From 9075421303649490cfd4cdb778faedf1d81734f3 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Wed, 24 Jul 2024 12:12:12 +0000 Subject: [PATCH 01/44] Fixed overflow bug for large sizes in thrust::shuffle --- testing/shuffle.cu | 53 +++++++++++++----------- thrust/system/detail/generic/shuffle.inl | 2 +- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/testing/shuffle.cu b/testing/shuffle.cu index 77e660c00..32210d530 100644 --- a/testing/shuffle.cu +++ b/testing/shuffle.cu @@ -1,8 +1,9 @@ #include -#include #include +#include #include +#include #include #include #include @@ -360,7 +361,7 @@ DECLARE_VECTOR_UNITTEST(TestShuffleCopySimple); template void TestHostDeviceIdentical(size_t m) { thrust::host_vector host_result(m); - thrust::host_vector device_result(m); + thrust::device_vector device_result(m); thrust::sequence(host_result.begin(), host_result.end(), T{}); thrust::sequence(device_result.begin(), device_result.end(), T{}); @@ -382,30 +383,34 @@ void TestFunctionIsBijection(size_t m) { thrust::system::detail::generic::feistel_bijection host_f(m, host_g); thrust::system::detail::generic::feistel_bijection device_f(m, device_g); - if (static_cast(host_f.nearest_power_of_two()) >= static_cast(std::numeric_limits::max()) || m == 0) { - return; - } - - thrust::host_vector host_result(host_f.nearest_power_of_two()); - thrust::host_vector device_result(device_f.nearest_power_of_two()); - thrust::sequence(host_result.begin(), host_result.end(), T{}); - thrust::sequence(device_result.begin(), device_result.end(), T{}); - - thrust::transform(host_result.begin(), host_result.end(), host_result.begin(), - host_f); - thrust::transform(device_result.begin(), device_result.end(), - device_result.begin(), device_f); - - ASSERT_EQUAL(host_result, device_result); - - thrust::sort(host_result.begin(), host_result.end()); - // Assert all values were generated exactly once - for (uint64_t i = 0; i < m; i++) { - ASSERT_EQUAL((uint64_t)host_result[i], i); + const size_t total_length = device_f.nearest_power_of_two(); + if(static_cast(total_length) >= static_cast(std::numeric_limits::max()) + || m == 0) + { + return; } + ASSERT_LEQUAL( + total_length, + std::max(m * 2, size_t(16))); // Check the rounded up size is at most double the input + + auto device_result_it + = thrust::make_transform_iterator(thrust::make_counting_iterator(T(0)), device_f); + + thrust::device_vector unpermuted(total_length, T(0)); + + // Run a scatter, this should copy each value to the index matching is value, the result should be in ascending order + thrust::scatter(device_result_it, + device_result_it + + static_cast(total_length), // total_length is guaranteed to fit T + device_result_it, + unpermuted.begin()); + + // Check every index is in the result, if any are missing then the function was not a bijection over [0,m) + ASSERT_EQUAL( + true, + thrust::equal(unpermuted.begin(), unpermuted.end(), thrust::make_counting_iterator(T(0)))); } -DECLARE_VARIABLE_UNITTEST(TestFunctionIsBijection); - +DECLARE_INTEGRAL_VARIABLE_UNITTEST(TestFunctionIsBijection); void TestBijectionLength() { thrust::default_random_engine g(0xD5); diff --git a/thrust/system/detail/generic/shuffle.inl b/thrust/system/detail/generic/shuffle.inl index baece51be..0deb1f631 100644 --- a/thrust/system/detail/generic/shuffle.inl +++ b/thrust/system/detail/generic/shuffle.inl @@ -69,7 +69,7 @@ class feistel_bijection { state[1] = lo & right_side_mask; } // Combine the left and right sides together to get result - return static_cast(state[0] << right_side_bits) | static_cast(state[1]); + return (static_cast(state[0]) << right_side_bits) | static_cast(state[1]); } private: From e9397cc513dbac15a15aa28a93788a538f298eb9 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Wed, 24 Jul 2024 16:31:45 +0000 Subject: [PATCH 02/44] Added definitions of execution space macros --- .clang-format | 114 ++- docs/doxygen/Doxyfile | 2 +- testing/shuffle.cu | 2 +- thrust/addressof.h | 2 +- thrust/adjacent_difference.h | 4 +- thrust/advance.h | 6 +- thrust/allocate_unique.h | 12 +- thrust/async/copy.h | 10 +- thrust/async/for_each.h | 8 +- thrust/async/reduce.h | 40 +- thrust/async/sort.h | 26 +- thrust/async/transform.h | 8 +- thrust/binary_search.h | 28 +- thrust/complex.h | 160 +-- thrust/copy.h | 18 +- thrust/count.h | 8 +- thrust/detail/adjacent_difference.inl | 8 +- thrust/detail/advance.inl | 8 +- thrust/detail/alignment.h | 4 +- thrust/detail/allocator/allocator_traits.h | 18 +- thrust/detail/allocator/allocator_traits.inl | 94 +- .../detail/allocator/copy_construct_range.h | 4 +- .../detail/allocator/copy_construct_range.inl | 24 +- .../allocator/default_construct_range.h | 2 +- .../allocator/default_construct_range.inl | 10 +- thrust/detail/allocator/destroy_range.h | 2 +- thrust/detail/allocator/destroy_range.inl | 16 +- .../detail/allocator/fill_construct_range.h | 2 +- .../detail/allocator/fill_construct_range.inl | 10 +- thrust/detail/allocator/no_throw_allocator.h | 10 +- thrust/detail/allocator/tagged_allocator.h | 16 +- thrust/detail/allocator/tagged_allocator.inl | 16 +- thrust/detail/allocator/temporary_allocator.h | 10 +- .../detail/allocator/temporary_allocator.inl | 4 +- thrust/detail/binary_search.inl | 56 +- thrust/detail/complex/arithmetic.h | 52 +- thrust/detail/complex/c99math.h | 30 +- thrust/detail/complex/catrig.h | 50 +- thrust/detail/complex/catrigf.h | 36 +- thrust/detail/complex/ccosh.h | 12 +- thrust/detail/complex/ccoshf.h | 8 +- thrust/detail/complex/cexp.h | 12 +- thrust/detail/complex/cexpf.h | 10 +- thrust/detail/complex/clog.h | 12 +- thrust/detail/complex/clogf.h | 8 +- thrust/detail/complex/complex.inl | 60 +- thrust/detail/complex/cpow.h | 8 +- thrust/detail/complex/cproj.h | 10 +- thrust/detail/complex/csinh.h | 12 +- thrust/detail/complex/csinhf.h | 8 +- thrust/detail/complex/csqrt.h | 6 +- thrust/detail/complex/csqrtf.h | 4 +- thrust/detail/complex/ctanh.h | 12 +- thrust/detail/complex/ctanhf.h | 8 +- thrust/detail/complex/math_private.h | 16 +- thrust/detail/config/config.h | 4 +- thrust/detail/config/cpp_compatibility.h | 4 +- thrust/detail/config/exec_check_disable.h | 41 - thrust/detail/config/execution_space.h | 41 + thrust/detail/config/forceinline.h | 41 - thrust/detail/config/host_device.h | 45 - thrust/detail/contiguous_storage.h | 108 +-- thrust/detail/contiguous_storage.inl | 118 +-- thrust/detail/copy.h | 8 +- thrust/detail/copy.inl | 16 +- thrust/detail/copy_if.h | 4 +- thrust/detail/copy_if.inl | 8 +- thrust/detail/count.h | 4 +- thrust/detail/count.inl | 8 +- .../dependencies_aware_execution_policy.h | 12 +- thrust/detail/device_ptr.inl | 4 +- thrust/detail/distance.inl | 4 +- thrust/detail/equal.inl | 8 +- thrust/detail/event_error.h | 8 +- thrust/detail/execute_with_allocator.h | 8 +- thrust/detail/execute_with_allocator_fwd.h | 18 +- thrust/detail/execute_with_dependencies.h | 46 +- thrust/detail/execution_policy.h | 6 +- thrust/detail/extrema.inl | 24 +- thrust/detail/fill.inl | 12 +- thrust/detail/find.inl | 12 +- thrust/detail/for_each.inl | 8 +- thrust/detail/function.h | 56 +- thrust/detail/functional.inl | 4 +- thrust/detail/functional/actor.h | 14 +- thrust/detail/functional/actor.inl | 8 +- thrust/detail/functional/argument.h | 4 +- thrust/detail/functional/composite.h | 12 +- .../operators/arithmetic_operators.h | 64 +- .../operators/assignment_operator.h | 6 +- .../functional/operators/bitwise_operators.h | 46 +- .../operators/compound_assignment_operators.h | 80 +- .../functional/operators/logical_operators.h | 14 +- .../functional/operators/operator_adaptors.h | 4 +- .../operators/relational_operators.h | 36 +- thrust/detail/functional/value.h | 6 +- thrust/detail/gather.inl | 12 +- thrust/detail/generate.inl | 8 +- thrust/detail/get_iterator_value.h | 4 +- thrust/detail/inner_product.inl | 8 +- thrust/detail/integer_math.h | 20 +- thrust/detail/internal_functional.h | 120 +-- thrust/detail/logical.inl | 12 +- thrust/detail/malloc_and_free.h | 14 +- thrust/detail/memory_algorithms.h | 16 +- thrust/detail/merge.inl | 16 +- thrust/detail/minmax.h | 8 +- thrust/detail/mismatch.inl | 8 +- thrust/detail/numeric_traits.h | 2 +- thrust/detail/pair.inl | 38 +- thrust/detail/partition.inl | 40 +- thrust/detail/pointer.h | 38 +- thrust/detail/pointer.inl | 40 +- thrust/detail/range/head_flags.h | 40 +- thrust/detail/range/tail_flags.h | 24 +- thrust/detail/raw_pointer_cast.h | 6 +- thrust/detail/raw_reference_cast.h | 18 +- thrust/detail/reduce.inl | 24 +- thrust/detail/reference.h | 48 +- thrust/detail/remove.inl | 24 +- thrust/detail/replace.inl | 24 +- thrust/detail/reverse.inl | 8 +- thrust/detail/scan.inl | 48 +- thrust/detail/scatter.inl | 12 +- thrust/detail/select_system.h | 8 +- thrust/detail/seq.h | 6 +- thrust/detail/sequence.inl | 12 +- thrust/detail/set_operations.inl | 64 +- thrust/detail/shuffle.inl | 12 +- thrust/detail/sort.inl | 50 +- thrust/detail/static_map.h | 8 +- thrust/detail/swap.h | 4 +- thrust/detail/swap_ranges.inl | 4 +- thrust/detail/tabulate.inl | 4 +- thrust/detail/temporary_array.h | 16 +- thrust/detail/temporary_array.inl | 20 +- thrust/detail/temporary_buffer.h | 10 +- thrust/detail/transform.inl | 20 +- thrust/detail/transform_reduce.inl | 4 +- thrust/detail/transform_scan.inl | 8 +- thrust/detail/trivial_sequence.h | 14 +- thrust/detail/tuple.inl | 172 ++-- thrust/detail/tuple_transform.h | 6 +- thrust/detail/type_traits.h | 6 +- thrust/detail/type_traits/pointer_traits.h | 20 +- thrust/detail/uninitialized_copy.inl | 8 +- thrust/detail/uninitialized_fill.inl | 8 +- thrust/detail/unique.inl | 48 +- thrust/detail/util/align.h | 6 +- thrust/detail/vector_base.h | 48 +- thrust/detail/vector_base.inl | 48 +- thrust/device_allocator.h | 16 +- thrust/device_make_unique.h | 2 +- thrust/device_malloc_allocator.h | 20 +- thrust/device_new_allocator.h | 22 +- thrust/device_ptr.h | 22 +- thrust/device_reference.h | 16 +- thrust/distance.h | 2 +- thrust/equal.h | 8 +- thrust/execution_policy.h | 8 +- thrust/extrema.h | 36 +- thrust/fill.h | 8 +- thrust/find.h | 22 +- thrust/for_each.h | 20 +- thrust/functional.h | 128 +-- thrust/future.h | 4 +- thrust/gather.h | 10 +- thrust/generate.h | 4 +- thrust/host_vector.h | 42 +- thrust/inner_product.h | 4 +- thrust/iterator/constant_iterator.h | 22 +- thrust/iterator/counting_iterator.h | 16 +- thrust/iterator/detail/any_assign.h | 6 +- thrust/iterator/detail/counting_iterator.inl | 8 +- thrust/iterator/detail/join_iterator.h | 8 +- thrust/iterator/detail/normal_iterator.h | 8 +- thrust/iterator/detail/retag.h | 16 +- thrust/iterator/detail/reverse_iterator.inl | 20 +- thrust/iterator/detail/tagged_iterator.h | 4 +- .../transform_input_output_iterator.inl | 16 +- .../detail/transform_output_iterator.inl | 6 +- .../detail/tuple_of_iterator_references.h | 44 +- thrust/iterator/detail/zip_iterator.inl | 28 +- thrust/iterator/detail/zip_iterator_base.h | 28 +- thrust/iterator/discard_iterator.h | 8 +- thrust/iterator/iterator_adaptor.h | 38 +- thrust/iterator/iterator_facade.h | 78 +- thrust/iterator/permutation_iterator.h | 10 +- thrust/iterator/retag.h | 4 +- thrust/iterator/reverse_iterator.h | 20 +- .../transform_input_output_iterator.h | 6 +- thrust/iterator/transform_iterator.h | 26 +- thrust/iterator/transform_output_iterator.h | 8 +- thrust/iterator/zip_iterator.h | 24 +- thrust/logical.h | 6 +- thrust/memory.h | 26 +- thrust/merge.h | 8 +- thrust/mismatch.h | 4 +- thrust/mr/allocator.h | 28 +- thrust/mr/disjoint_pool.h | 18 +- thrust/mr/disjoint_tls_pool.h | 2 +- thrust/mr/memory_resource.h | 14 +- thrust/mr/polymorphic_adaptor.h | 2 +- thrust/mr/tls_pool.h | 2 +- thrust/optional.h | 917 +++++++++--------- thrust/pair.h | 26 +- thrust/partition.h | 52 +- thrust/per_device_resource.h | 10 +- thrust/random/detail/discard_block_engine.inl | 22 +- .../detail/linear_congruential_engine.inl | 14 +- .../linear_congruential_engine_discard.h | 6 +- .../detail/linear_feedback_shift_engine.inl | 14 +- thrust/random/detail/mod.h | 6 +- thrust/random/detail/normal_distribution.inl | 28 +- .../random/detail/normal_distribution_base.h | 8 +- thrust/random/detail/random_core_access.h | 2 +- .../detail/subtract_with_carry_engine.inl | 14 +- .../detail/uniform_int_distribution.inl | 28 +- .../detail/uniform_real_distribution.inl | 28 +- thrust/random/detail/xor_combine_engine.inl | 24 +- thrust/random/discard_block_engine.h | 22 +- thrust/random/linear_congruential_engine.h | 14 +- thrust/random/linear_feedback_shift_engine.h | 14 +- thrust/random/normal_distribution.h | 28 +- thrust/random/subtract_with_carry_engine.h | 14 +- thrust/random/uniform_int_distribution.h | 28 +- thrust/random/uniform_real_distribution.h | 28 +- thrust/random/xor_combine_engine.h | 24 +- thrust/reduce.h | 12 +- thrust/remove.h | 20 +- thrust/replace.h | 28 +- thrust/reverse.h | 4 +- thrust/scan.h | 24 +- thrust/scatter.h | 10 +- thrust/sequence.h | 6 +- thrust/set_operations.h | 32 +- thrust/shuffle.h | 8 +- thrust/sort.h | 26 +- thrust/swap.h | 4 +- thrust/system/cpp/detail/par.h | 2 +- thrust/system/cpp/execution_policy.h | 2 +- thrust/system/cuda/config.h | 11 +- .../system/cuda/detail/adjacent_difference.h | 8 +- thrust/system/cuda/detail/assign_value.h | 12 +- thrust/system/cuda/detail/async/for_each.h | 4 +- thrust/system/cuda/detail/async/transform.h | 4 +- thrust/system/cuda/detail/copy.h | 24 +- thrust/system/cuda/detail/copy_if.h | 12 +- .../system/cuda/detail/core/agent_launcher.h | 1 - .../cuda/detail/core/triple_chevron_launch.h | 23 +- thrust/system/cuda/detail/core/util.h | 60 +- thrust/system/cuda/detail/count.h | 4 +- thrust/system/cuda/detail/cross_system.h | 63 +- thrust/system/cuda/detail/equal.h | 4 +- thrust/system/cuda/detail/error.inl | 1 - thrust/system/cuda/detail/extrema.h | 32 +- thrust/system/cuda/detail/fill.h | 4 +- thrust/system/cuda/detail/find.h | 14 +- thrust/system/cuda/detail/future.inl | 266 ++--- thrust/system/cuda/detail/gather.h | 6 +- thrust/system/cuda/detail/generate.h | 4 +- thrust/system/cuda/detail/get_value.h | 8 +- .../cuda/detail/guarded_cuda_runtime_api.h | 39 - .../system/cuda/detail/guarded_driver_types.h | 63 -- thrust/system/cuda/detail/inner_product.h | 4 +- .../cuda/detail/internal/copy_cross_system.h | 10 +- thrust/system/cuda/detail/iter_swap.h | 6 +- thrust/system/cuda/detail/malloc_and_free.h | 15 +- thrust/system/cuda/detail/memory.inl | 6 +- thrust/system/cuda/detail/merge.h | 12 +- thrust/system/cuda/detail/mismatch.h | 8 +- thrust/system/cuda/detail/par.h | 39 +- thrust/system/cuda/detail/par_to_seq.h | 6 +- thrust/system/cuda/detail/parallel_for.h | 4 +- thrust/system/cuda/detail/partition.h | 34 +- .../system/cuda/detail/per_device_resource.h | 2 +- thrust/system/cuda/detail/reduce.h | 12 +- thrust/system/cuda/detail/reduce_by_key.h | 10 +- thrust/system/cuda/detail/remove.h | 12 +- thrust/system/cuda/detail/replace.h | 12 +- thrust/system/cuda/detail/reverse.h | 8 +- thrust/system/cuda/detail/scan.h | 26 +- thrust/system/cuda/detail/scan_by_key.h | 26 +- thrust/system/cuda/detail/scatter.h | 6 +- thrust/system/cuda/detail/set_operations.h | 48 +- thrust/system/cuda/detail/sort.h | 24 +- thrust/system/cuda/detail/swap_ranges.h | 2 +- thrust/system/cuda/detail/tabulate.h | 6 +- thrust/system/cuda/detail/terminate.h | 4 +- thrust/system/cuda/detail/transform_reduce.h | 2 +- thrust/system/cuda/detail/transform_scan.h | 4 +- .../system/cuda/detail/uninitialized_copy.h | 4 +- .../system/cuda/detail/uninitialized_fill.h | 4 +- thrust/system/cuda/detail/unique.h | 24 +- thrust/system/cuda/detail/unique_by_key.h | 14 +- thrust/system/cuda/detail/util.h | 116 +-- thrust/system/cuda/error.h | 1 - thrust/system/cuda/future.h | 6 +- thrust/system/cuda/memory.h | 6 +- thrust/system/cuda/memory_resource.h | 1 - .../detail/generic/adjacent_difference.h | 4 +- .../detail/generic/adjacent_difference.inl | 4 +- thrust/system/detail/generic/advance.h | 2 +- thrust/system/detail/generic/advance.inl | 10 +- thrust/system/detail/generic/binary_search.h | 28 +- .../system/detail/generic/binary_search.inl | 44 +- thrust/system/detail/generic/copy.h | 4 +- thrust/system/detail/generic/copy.inl | 4 +- thrust/system/detail/generic/copy_if.h | 4 +- thrust/system/detail/generic/copy_if.inl | 6 +- thrust/system/detail/generic/count.h | 4 +- thrust/system/detail/generic/count.inl | 10 +- thrust/system/detail/generic/distance.h | 2 +- thrust/system/detail/generic/distance.inl | 12 +- thrust/system/detail/generic/equal.h | 4 +- thrust/system/detail/generic/equal.inl | 8 +- thrust/system/detail/generic/extrema.h | 12 +- thrust/system/detail/generic/extrema.inl | 26 +- thrust/system/detail/generic/fill.h | 4 +- thrust/system/detail/generic/find.h | 6 +- thrust/system/detail/generic/find.inl | 8 +- thrust/system/detail/generic/for_each.h | 6 +- thrust/system/detail/generic/gather.h | 6 +- thrust/system/detail/generic/gather.inl | 6 +- thrust/system/detail/generic/generate.h | 4 +- thrust/system/detail/generic/generate.inl | 4 +- thrust/system/detail/generic/inner_product.h | 4 +- .../system/detail/generic/inner_product.inl | 4 +- thrust/system/detail/generic/logical.h | 6 +- thrust/system/detail/generic/memory.h | 12 +- thrust/system/detail/generic/memory.inl | 12 +- thrust/system/detail/generic/merge.h | 8 +- thrust/system/detail/generic/merge.inl | 8 +- thrust/system/detail/generic/mismatch.h | 4 +- thrust/system/detail/generic/mismatch.inl | 4 +- thrust/system/detail/generic/partition.h | 20 +- thrust/system/detail/generic/partition.inl | 20 +- .../detail/generic/per_device_resource.h | 2 +- thrust/system/detail/generic/reduce.h | 6 +- thrust/system/detail/generic/reduce.inl | 6 +- thrust/system/detail/generic/reduce_by_key.h | 6 +- .../system/detail/generic/reduce_by_key.inl | 10 +- thrust/system/detail/generic/remove.h | 12 +- thrust/system/detail/generic/remove.inl | 12 +- thrust/system/detail/generic/replace.h | 12 +- thrust/system/detail/generic/replace.inl | 22 +- thrust/system/detail/generic/reverse.h | 4 +- thrust/system/detail/generic/reverse.inl | 4 +- .../detail/generic/scalar/binary_search.h | 12 +- .../detail/generic/scalar/binary_search.inl | 12 +- thrust/system/detail/generic/scan.h | 10 +- thrust/system/detail/generic/scan.inl | 10 +- thrust/system/detail/generic/scan_by_key.h | 14 +- thrust/system/detail/generic/scan_by_key.inl | 18 +- thrust/system/detail/generic/scatter.h | 6 +- thrust/system/detail/generic/scatter.inl | 6 +- thrust/system/detail/generic/select_system.h | 14 +- .../system/detail/generic/select_system.inl | 20 +- thrust/system/detail/generic/sequence.h | 6 +- thrust/system/detail/generic/sequence.inl | 14 +- thrust/system/detail/generic/set_operations.h | 32 +- .../system/detail/generic/set_operations.inl | 32 +- thrust/system/detail/generic/shuffle.h | 4 +- thrust/system/detail/generic/shuffle.inl | 24 +- thrust/system/detail/generic/sort.h | 24 +- thrust/system/detail/generic/sort.inl | 24 +- thrust/system/detail/generic/swap_ranges.h | 2 +- thrust/system/detail/generic/swap_ranges.inl | 4 +- thrust/system/detail/generic/tabulate.h | 2 +- thrust/system/detail/generic/tabulate.inl | 2 +- thrust/system/detail/generic/tag.h | 2 +- .../system/detail/generic/temporary_buffer.h | 10 +- .../detail/generic/temporary_buffer.inl | 10 +- thrust/system/detail/generic/transform.h | 10 +- thrust/system/detail/generic/transform.inl | 10 +- .../system/detail/generic/transform_reduce.h | 2 +- .../detail/generic/transform_reduce.inl | 2 +- thrust/system/detail/generic/transform_scan.h | 4 +- .../system/detail/generic/transform_scan.inl | 4 +- .../detail/generic/uninitialized_copy.h | 4 +- .../detail/generic/uninitialized_copy.inl | 14 +- .../detail/generic/uninitialized_fill.h | 4 +- .../detail/generic/uninitialized_fill.inl | 12 +- thrust/system/detail/generic/unique.h | 12 +- thrust/system/detail/generic/unique.inl | 12 +- thrust/system/detail/generic/unique_by_key.h | 8 +- .../system/detail/generic/unique_by_key.inl | 8 +- thrust/system/detail/internal/decompose.h | 14 +- .../detail/sequential/adjacent_difference.h | 4 +- .../system/detail/sequential/assign_value.h | 2 +- .../system/detail/sequential/binary_search.h | 12 +- thrust/system/detail/sequential/copy.h | 4 +- thrust/system/detail/sequential/copy.inl | 26 +- .../system/detail/sequential/copy_backward.h | 4 +- thrust/system/detail/sequential/copy_if.h | 4 +- .../detail/sequential/execution_policy.h | 4 +- thrust/system/detail/sequential/extrema.h | 12 +- thrust/system/detail/sequential/find.h | 4 +- thrust/system/detail/sequential/for_each.h | 6 +- .../system/detail/sequential/general_copy.h | 16 +- thrust/system/detail/sequential/get_value.h | 2 +- .../system/detail/sequential/insertion_sort.h | 8 +- thrust/system/detail/sequential/iter_swap.h | 2 +- .../detail/sequential/malloc_and_free.h | 4 +- thrust/system/detail/sequential/merge.h | 4 +- thrust/system/detail/sequential/merge.inl | 8 +- thrust/system/detail/sequential/partition.h | 28 +- thrust/system/detail/sequential/reduce.h | 4 +- .../system/detail/sequential/reduce_by_key.h | 4 +- thrust/system/detail/sequential/remove.h | 16 +- thrust/system/detail/sequential/scan.h | 8 +- thrust/system/detail/sequential/scan_by_key.h | 8 +- .../system/detail/sequential/set_operations.h | 16 +- thrust/system/detail/sequential/sort.h | 4 +- thrust/system/detail/sequential/sort.inl | 14 +- .../detail/sequential/stable_merge_sort.h | 4 +- .../detail/sequential/stable_merge_sort.inl | 26 +- .../detail/sequential/stable_primitive_sort.h | 4 +- .../sequential/stable_primitive_sort.inl | 14 +- .../detail/sequential/stable_radix_sort.h | 4 +- .../detail/sequential/stable_radix_sort.inl | 50 +- .../system/detail/sequential/trivial_copy.h | 2 +- thrust/system/detail/sequential/unique.h | 8 +- .../system/detail/sequential/unique_by_key.h | 6 +- thrust/system/hip/config.h | 12 +- .../system/hip/detail/adjacent_difference.h | 6 +- thrust/system/hip/detail/async/for_each.h | 4 +- thrust/system/hip/detail/async/transform.h | 4 +- thrust/system/hip/detail/binary_search.h | 24 +- thrust/system/hip/detail/copy.h | 16 +- thrust/system/hip/detail/copy_if.h | 12 +- thrust/system/hip/detail/cross_system.h | 54 +- thrust/system/hip/detail/extrema.h | 6 +- thrust/system/hip/detail/fill.h | 2 +- thrust/system/hip/detail/future.inl | 260 ++--- thrust/system/hip/detail/general/various.h | 8 +- .../system/hip/detail/guarded_driver_types.h | 18 +- .../hip/detail/guarded_hip_runtime_api.h | 14 +- .../hip/detail/internal/copy_cross_system.h | 10 +- thrust/system/hip/detail/malloc_and_free.h | 4 +- thrust/system/hip/detail/merge.h | 14 +- thrust/system/hip/detail/par.h | 54 +- thrust/system/hip/detail/parallel_for.h | 6 +- thrust/system/hip/detail/partition.h | 48 +- .../system/hip/detail/per_device_resource.h | 4 +- thrust/system/hip/detail/reduce.h | 6 +- thrust/system/hip/detail/reduce_by_key.h | 12 +- thrust/system/hip/detail/reverse.h | 4 +- thrust/system/hip/detail/scan.h | 10 +- thrust/system/hip/detail/scan_by_key.h | 12 +- thrust/system/hip/detail/set_operations.h | 2 +- thrust/system/hip/detail/sort.h | 16 +- thrust/system/hip/detail/tabulate.h | 4 +- thrust/system/hip/detail/unique.h | 20 +- thrust/system/hip/detail/unique_by_key.h | 14 +- thrust/system/hip/detail/util.h | 36 +- thrust/system/hip/future.h | 6 +- thrust/system/hip/memory.h | 8 +- thrust/system/omp/detail/execution_policy.h | 4 +- thrust/system/omp/detail/par.h | 2 +- thrust/system/omp/execution_policy.h | 2 +- thrust/system/tbb/detail/par.h | 2 +- thrust/system/tbb/execution_policy.h | 2 +- thrust/tabulate.h | 2 +- thrust/transform.h | 14 +- thrust/transform_reduce.h | 6 +- thrust/transform_scan.h | 4 +- thrust/tuple.h | 104 +- thrust/type_traits/integer_sequence.h | 2 +- thrust/type_traits/is_contiguous_iterator.h | 8 +- thrust/type_traits/is_trivially_relocatable.h | 2 - thrust/uninitialized_copy.h | 12 +- thrust/uninitialized_fill.h | 12 +- thrust/unique.h | 24 +- thrust/zip_function.h | 20 +- 475 files changed, 4577 insertions(+), 4705 deletions(-) delete mode 100644 thrust/detail/config/exec_check_disable.h create mode 100644 thrust/detail/config/execution_space.h delete mode 100644 thrust/detail/config/forceinline.h delete mode 100644 thrust/detail/config/host_device.h delete mode 100644 thrust/system/cuda/detail/guarded_cuda_runtime_api.h delete mode 100644 thrust/system/cuda/detail/guarded_driver_types.h diff --git a/.clang-format b/.clang-format index 4082344a4..d3b0fef7e 100644 --- a/.clang-format +++ b/.clang-format @@ -1,11 +1,11 @@ # Style file for MLSE Libraries based on the modified rocBLAS style # Common settings -BasedOnStyle: WebKit -TabWidth: 4 -IndentWidth: 4 +BasedOnStyle: LLVM +TabWidth: 2 +IndentWidth: 2 UseTab: Never -ColumnLimit: 100 +ColumnLimit: 120 # Other languages JavaScript, Proto @@ -20,14 +20,14 @@ Language: Cpp # void formatted_code_again; DisableFormat: false -Standard: Cpp11 - -AccessModifierOffset: -4 +Standard: c++14 +AccessModifierOffset: -2 AlignAfterOpenBracket: true AlignConsecutiveAssignments: true AlignConsecutiveDeclarations: true AlignEscapedNewlinesLeft: true AlignOperands: true +AllowAllArgumentsOnNextLine: true AlignTrailingComments: false AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false @@ -39,6 +39,18 @@ AlwaysBreakAfterDefinitionReturnType: false AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: true +AttributeMacros: [ + 'THRUST_DEVICE', + 'THRUST_FORCEINLINE', + 'THRUST_HOST_DEVICE', + 'THRUST_HOST', + '_CCCL_DEVICE', + '_CCCL_FORCEINLINE', + '_CCCL_HOST_DEVICE', + '_CCCL_HOST', + 'THRUST_RUNTIME_FUNCTION', + 'THRUST_DETAIL_KERNEL_ATTRIBUTES', + ] BinPackArguments: false BinPackParameters: false @@ -46,6 +58,7 @@ BinPackParameters: false BreakBeforeBraces: Custom # Control of individual brace wrapping cases BraceWrapping: { + AfterCaseLabel: 'false' AfterClass: 'true' AfterControlStatement: 'true' AfterEnum : 'true' @@ -56,52 +69,69 @@ BraceWrapping: { BeforeCatch : 'true' BeforeElse : 'true' IndentBraces : 'false' -# AfterExternBlock : 'true' + SplitEmptyFunction: 'false' + SplitEmptyRecord: 'false' } -#BreakAfterJavaFieldAnnotations: true -#BreakBeforeInheritanceComma: false -#BreakBeforeBinaryOperators: None -#BreakBeforeTernaryOperators: true -#BreakConstructorInitializersBeforeComma: true -#BreakStringLiterals: true +BreakBeforeConceptDeclarations: true +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeComma +BreakInheritanceList: BeforeComma +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: Always + +InsertBraces: true +InsertNewlineAtEOF: true +InsertTrailingCommas: Wrapped +IndentRequires: true +IndentPPDirectives: AfterHash +PackConstructorInitializers: Never +PenaltyBreakAssignment: 30 +PenaltyBreakTemplateDeclaration: 0 +PenaltyIndentedWhitespace: 2 +RemoveSemicolon: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeRangeBasedForLoopColon: true + CommentPragmas: '^ IWYU pragma:' -#CompactNamespaces: false +CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 +ContinuationIndentWidth: 2 Cpp11BracedListStyle: true -#SpaceBeforeCpp11BracedList: false -DerivePointerAlignment: false +SpaceBeforeCpp11BracedList: false ExperimentalAutoDetectBinPacking: false ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] -IndentCaseLabels: false -#FixNamespaceComments: true +IndentCaseLabels: true +FixNamespaceComments: true IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' #JavaScriptQuotes: Double MaxEmptyLinesToKeep: 1 -NamespaceIndentation: Inner +NamespaceIndentation: None ObjCBlockIndentWidth: 4 #ObjCSpaceAfterProperty: true #ObjCSpaceBeforeProtocolList: true -PenaltyBreakBeforeFirstCallParameter: 19 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 - -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyBreakBeforeFirstCallParameter: 50 +PenaltyBreakComment: 0 +PenaltyBreakFirstLessLess: 0 +PenaltyBreakString: 70 +PenaltyExcessCharacter: 100 +PenaltyReturnTypeOnItsOwnLine: 90 PointerAlignment: Left -SpaceAfterCStyleCast: false +SpaceAfterCStyleCast: true SpaceBeforeAssignmentOperators: true -SpaceBeforeParens: Never +SpaceBeforeParens: ControlStatements SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 -SpacesInAngles: false +SpacesInAngles: Never SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false @@ -110,11 +140,25 @@ SpacesInSquareBrackets: false #SpaceBeforeInheritanceColon: true #SortUsingDeclarations: true -SortIncludes: true +SortIncludes: CaseInsensitive -# Comments are for developers, they should arrange them -ReflowComments: false +ReflowComments: true #IncludeBlocks: Preserve #IndentPPDirectives: AfterHash + +StatementMacros: [ + 'THRUST_EXEC_CHECK_DISABLE', + 'THRUST_NAMESPACE_BEGIN', + 'THRUST_NAMESPACE_END', + 'THRUST_EXEC_CHECK_DISABLE', + 'CUB_NAMESPACE_BEGIN', + 'CUB_NAMESPACE_END', + 'THRUST_NAMESPACE_BEGIN', + 'THRUST_NAMESPACE_END', + '_LIBCUDACXX_BEGIN_NAMESPACE_STD', + '_LIBCUDACXX_END_NAMESPACE_STD', +] +TabWidth: 2 +UseTab: Never --- diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index 2e490e837..c654f6995 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -2134,7 +2134,7 @@ PREDEFINED = THRUST_DOXYGEN \ "THRUST_MR_DEFAULT_ALIGNMENT=alignof(std::max_align_t)" \ __cpp_lib_remove_cvref=201711 \ THRUST_PREVENT_MACRO_SUBSTITUTION= \ - __thrust_exec_check_disable__= \ + THRUST_EXEC_CHECK_DISABLE= \ # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/testing/shuffle.cu b/testing/shuffle.cu index 32210d530..1108f7802 100644 --- a/testing/shuffle.cu +++ b/testing/shuffle.cu @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/thrust/addressof.h b/thrust/addressof.h index e4aef753b..c1703426e 100644 --- a/thrust/addressof.h +++ b/thrust/addressof.h @@ -18,7 +18,7 @@ THRUST_NAMESPACE_BEGIN /*! Obtains the actual address of the object or function arg, even in presence of overloaded operator&. */ template -__host__ __device__ +THRUST_HOST_DEVICE T* addressof(T& arg) { return reinterpret_cast( diff --git a/thrust/adjacent_difference.h b/thrust/adjacent_difference.h index e8385c240..d3b7532d6 100644 --- a/thrust/adjacent_difference.h +++ b/thrust/adjacent_difference.h @@ -79,7 +79,7 @@ THRUST_NAMESPACE_BEGIN * \see inclusive_scan */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result); @@ -134,7 +134,7 @@ OutputIterator adjacent_difference(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, diff --git a/thrust/advance.h b/thrust/advance.h index a5162e203..3b3e85c55 100644 --- a/thrust/advance.h +++ b/thrust/advance.h @@ -60,7 +60,7 @@ THRUST_NAMESPACE_BEGIN * \see https://en.cppreference.com/w/cpp/iterator/advance */ template -__host__ __device__ +THRUST_HOST_DEVICE void advance(InputIterator& i, Distance n); /*! \p next(i, n) returns the \p n th successor of the iterator \p i. @@ -91,7 +91,7 @@ void advance(InputIterator& i, Distance n); */ #if 0 // Doxygen only template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator next( InputIterator i , typename iterator_traits::difference_type n = 1 @@ -124,7 +124,7 @@ InputIterator next( */ #if 0 // Doxygen only template -__host__ __device__ +THRUST_HOST_DEVICE BidirectionalIterator prev( BidirectionalIterator i , typename iterator_traits::difference_type n = 1 diff --git a/thrust/allocate_unique.h b/thrust/allocate_unique.h index 334ac04cd..e58ca84dc 100644 --- a/thrust/allocate_unique.h +++ b/thrust/allocate_unique.h @@ -255,14 +255,14 @@ using uninitialized_array_allocator_delete template struct tagged_deleter : Lambda { - __host__ __device__ + THRUST_HOST_DEVICE tagged_deleter(Lambda&& l) : Lambda(THRUST_FWD(l)) {} using pointer = Pointer; }; template -__host__ __device__ +THRUST_HOST_DEVICE tagged_deleter make_tagged_deleter(Lambda&& l) { @@ -272,7 +272,7 @@ make_tagged_deleter(Lambda&& l) /////////////////////////////////////////////////////////////////////////////// template -__host__ +THRUST_HOST std::unique_ptr< T, allocator_delete< @@ -313,7 +313,7 @@ allocate_unique( } template -__host__ +THRUST_HOST std::unique_ptr< T, uninitialized_allocator_delete< @@ -353,7 +353,7 @@ uninitialized_allocate_unique( } template -__host__ +THRUST_HOST std::unique_ptr< T[], array_allocator_delete< @@ -396,7 +396,7 @@ allocate_unique_n( } template -__host__ +THRUST_HOST std::unique_ptr< T[], uninitialized_array_allocator_delete< diff --git a/thrust/async/copy.h b/thrust/async/copy.h index fa805c6fd..89632096e 100644 --- a/thrust/async/copy.h +++ b/thrust/async/copy.h @@ -47,7 +47,7 @@ template < typename FromPolicy, typename ToPolicy , typename ForwardIt, typename Sentinel, typename OutputIt > -__host__ +THRUST_HOST event async_copy( thrust::execution_policy& @@ -75,7 +75,7 @@ struct copy_fn final typename FromPolicy, typename ToPolicy , typename ForwardIt, typename Sentinel, typename OutputIt > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& from_exec , thrust::detail::execution_policy_base const& to_exec @@ -96,7 +96,7 @@ struct copy_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename OutputIt > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -116,7 +116,7 @@ struct copy_fn final ) template - __host__ + THRUST_HOST static auto call(ForwardIt&& first, Sentinel&& last, OutputIt&& output) THRUST_RETURNS( copy_fn::call( @@ -132,7 +132,7 @@ struct copy_fn final ) template - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST auto operator()(Args&&... args) const THRUST_RETURNS( call(THRUST_FWD(args)...) diff --git a/thrust/async/for_each.h b/thrust/async/for_each.h index 0d3b3a189..5165ee896 100644 --- a/thrust/async/for_each.h +++ b/thrust/async/for_each.h @@ -47,7 +47,7 @@ template < typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename UnaryFunction > -__host__ +THRUST_HOST event async_for_each( thrust::execution_policy&, ForwardIt, Sentinel, UnaryFunction @@ -73,7 +73,7 @@ struct for_each_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename UnaryFunction > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -89,7 +89,7 @@ struct for_each_fn final ) template - __host__ + THRUST_HOST static auto call(ForwardIt&& first, Sentinel&& last, UnaryFunction&& f) THRUST_RETURNS( for_each_fn::call( @@ -102,7 +102,7 @@ struct for_each_fn final ) template - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST auto operator()(Args&&... args) const THRUST_RETURNS( call(THRUST_FWD(args)...) diff --git a/thrust/async/reduce.h b/thrust/async/reduce.h index 728ac7185..65b2cf57e 100644 --- a/thrust/async/reduce.h +++ b/thrust/async/reduce.h @@ -49,7 +49,7 @@ template < typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename T, typename BinaryOp > -__host__ +THRUST_HOST future async_reduce( thrust::execution_policy&, ForwardIt, Sentinel, T, BinaryOp @@ -75,7 +75,7 @@ struct reduce_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename T, typename BinaryOp > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -96,7 +96,7 @@ struct reduce_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename T > - __host__ + THRUST_HOST static auto call4( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -117,7 +117,7 @@ struct reduce_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel > - __host__ + THRUST_HOST static auto call3( thrust::detail::execution_policy_base const& exec @@ -139,7 +139,7 @@ struct reduce_fn final ) template - __host__ + THRUST_HOST static auto call4(ForwardIt&& first, Sentinel&& last, T&& init, BinaryOp&& op, @@ -156,7 +156,7 @@ struct reduce_fn final ) template - __host__ + THRUST_HOST static auto call3(ForwardIt&& first, Sentinel&& last, T&& init, thrust::false_type) @@ -175,7 +175,7 @@ struct reduce_fn final // if T1 is an execution_policy by using SFINAE. Switching to a static // dispatch pattern to prevent this. template - __host__ + THRUST_HOST static auto call(T1&& t1, T2&& t2, T3&& t3) THRUST_RETURNS( reduce_fn::call3(THRUST_FWD(t1), THRUST_FWD(t2), THRUST_FWD(t3), @@ -183,7 +183,7 @@ struct reduce_fn final ) template - __host__ + THRUST_HOST static auto call(T1&& t1, T2&& t2, T3&& t3, T4&& t4) THRUST_RETURNS( reduce_fn::call4(THRUST_FWD(t1), THRUST_FWD(t2), THRUST_FWD(t3), THRUST_FWD(t4), @@ -191,7 +191,7 @@ struct reduce_fn final ) template - __host__ + THRUST_HOST static auto call(ForwardIt&& first, Sentinel&& last) THRUST_RETURNS( reduce_fn::call( @@ -209,7 +209,7 @@ struct reduce_fn final ) template - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST auto operator()(Args&&... args) const THRUST_RETURNS( call(THRUST_FWD(args)...) @@ -230,7 +230,7 @@ template < , typename ForwardIt, typename Sentinel, typename OutputIt , typename T, typename BinaryOp > -__host__ +THRUST_HOST event async_reduce_into( thrust::execution_policy& @@ -258,7 +258,7 @@ struct reduce_into_fn final , typename ForwardIt, typename Sentinel, typename OutputIt , typename T, typename BinaryOp > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -282,7 +282,7 @@ struct reduce_into_fn final , typename ForwardIt, typename Sentinel, typename OutputIt , typename T > - __host__ + THRUST_HOST static auto call5( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -305,7 +305,7 @@ struct reduce_into_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename OutputIt > - __host__ + THRUST_HOST static auto call4( thrust::detail::execution_policy_base const& exec @@ -332,7 +332,7 @@ struct reduce_into_fn final typename ForwardIt, typename Sentinel, typename OutputIt , typename T, typename BinaryOp > - __host__ + THRUST_HOST static auto call5( ForwardIt&& first, Sentinel&& last , OutputIt&& output @@ -357,7 +357,7 @@ struct reduce_into_fn final typename ForwardIt, typename Sentinel, typename OutputIt , typename T > - __host__ + THRUST_HOST static auto call4( ForwardIt&& first, Sentinel&& last , OutputIt&& output @@ -380,7 +380,7 @@ struct reduce_into_fn final template < typename ForwardIt, typename Sentinel, typename OutputIt > - __host__ + THRUST_HOST static auto call( ForwardIt&& first, Sentinel&& last , OutputIt&& output @@ -406,7 +406,7 @@ struct reduce_into_fn final // if T1 is an execution_policy by using SFINAE. Switching to a static // dispatch pattern to prevent this. template - __host__ + THRUST_HOST static auto call(T1&& t1, T2&& t2, T3&& t3, T4&& t4) THRUST_RETURNS( reduce_into_fn::call4( @@ -415,7 +415,7 @@ struct reduce_into_fn final ) template - __host__ + THRUST_HOST static auto call(T1&& t1, T2&& t2, T3&& t3, T4&& t4, T5&& t5) THRUST_RETURNS( reduce_into_fn::call5( @@ -424,7 +424,7 @@ struct reduce_into_fn final ) template - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST auto operator()(Args&&... args) const THRUST_RETURNS( call(THRUST_FWD(args)...) diff --git a/thrust/async/sort.h b/thrust/async/sort.h index 8cb69f550..435ae06c5 100644 --- a/thrust/async/sort.h +++ b/thrust/async/sort.h @@ -49,7 +49,7 @@ template < typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename StrictWeakOrdering > -__host__ +THRUST_HOST event async_stable_sort( thrust::execution_policy& @@ -76,7 +76,7 @@ struct stable_sort_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename StrictWeakOrdering > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -95,7 +95,7 @@ struct stable_sort_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -112,7 +112,7 @@ struct stable_sort_fn final ) template - __host__ + THRUST_HOST static auto call(ForwardIt&& first, Sentinel&& last, StrictWeakOrdering&& comp) THRUST_RETURNS( stable_sort_fn::call( @@ -125,7 +125,7 @@ struct stable_sort_fn final ) template - __host__ + THRUST_HOST static auto call(ForwardIt&& first, Sentinel&& last) THRUST_RETURNS( stable_sort_fn::call( @@ -137,7 +137,7 @@ struct stable_sort_fn final ) template - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST auto operator()(Args&&... args) const THRUST_RETURNS( call(THRUST_FWD(args)...) @@ -155,7 +155,7 @@ template < typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename StrictWeakOrdering > -__host__ +THRUST_HOST event async_sort( thrust::execution_policy& exec @@ -181,7 +181,7 @@ struct sort_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel, typename StrictWeakOrdering > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -200,7 +200,7 @@ struct sort_fn final typename DerivedPolicy , typename ForwardIt, typename Sentinel > - __host__ + THRUST_HOST static auto call3( thrust::detail::execution_policy_base const& exec , ForwardIt&& first, Sentinel&& last @@ -217,7 +217,7 @@ struct sort_fn final ) template - __host__ + THRUST_HOST static auto call3(ForwardIt&& first, Sentinel&& last, StrictWeakOrdering&& comp, thrust::false_type) @@ -235,7 +235,7 @@ struct sort_fn final // if T1 is an execution_policy by using SFINAE. Switching to a static // dispatch pattern to prevent this. template - __host__ + THRUST_HOST static auto call(T1&& t1, T2&& t2, T3&& t3) THRUST_RETURNS( sort_fn::call3(THRUST_FWD(t1), THRUST_FWD(t2), THRUST_FWD(t3), @@ -243,7 +243,7 @@ struct sort_fn final ) template - __host__ + THRUST_HOST static auto call(ForwardIt&& first, Sentinel&& last) THRUST_RETURNS( sort_fn::call( @@ -258,7 +258,7 @@ struct sort_fn final ) template - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST auto operator()(Args&&... args) const THRUST_RETURNS( call(THRUST_FWD(args)...) diff --git a/thrust/async/transform.h b/thrust/async/transform.h index 29b26f8b7..543e7ffd8 100644 --- a/thrust/async/transform.h +++ b/thrust/async/transform.h @@ -48,7 +48,7 @@ template < , typename ForwardIt, typename Sentinel, typename OutputIt , typename UnaryOperation > -__host__ +THRUST_HOST event async_transform( thrust::execution_policy& @@ -76,7 +76,7 @@ struct transform_fn final , typename ForwardIt, typename Sentinel, typename OutputIt , typename UnaryOperation > - __host__ + THRUST_HOST static auto call( thrust::detail::execution_policy_base const& exec @@ -98,7 +98,7 @@ struct transform_fn final typename ForwardIt, typename Sentinel, typename OutputIt , typename UnaryOperation > - __host__ + THRUST_HOST static auto call( ForwardIt&& first, Sentinel&& last , OutputIt&& output @@ -117,7 +117,7 @@ struct transform_fn final ) template - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST auto operator()(Args&&... args) const THRUST_RETURNS( call(THRUST_FWD(args)...) diff --git a/thrust/binary_search.h b/thrust/binary_search.h index 7a4746e0b..580bc65b2 100644 --- a/thrust/binary_search.h +++ b/thrust/binary_search.h @@ -98,7 +98,7 @@ THRUST_NAMESPACE_BEGIN * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -209,7 +209,7 @@ ForwardIterator lower_bound(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -323,7 +323,7 @@ ForwardIterator lower_bound(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -435,7 +435,7 @@ ForwardIterator upper_bound(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -547,7 +547,7 @@ ForwardIterator upper_bound(ForwardIterator first, * \see \p equal_range */ template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -657,7 +657,7 @@ bool binary_search(ForwardIterator first, * \see \p equal_range */ template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -782,7 +782,7 @@ bool binary_search(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -921,7 +921,7 @@ equal_range(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -1075,7 +1075,7 @@ equal_range(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -1217,7 +1217,7 @@ OutputIterator lower_bound(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -1363,7 +1363,7 @@ OutputIterator lower_bound(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -1507,7 +1507,7 @@ OutputIterator upper_bound(ForwardIterator first, * \see \p binary_search */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -1654,7 +1654,7 @@ OutputIterator upper_bound(ForwardIterator first, * \see \p equal_range */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -1800,7 +1800,7 @@ OutputIterator binary_search(ForwardIterator first, * \see \p equal_range */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/complex.h b/thrust/complex.h index 07fed25cd..6d8425fa3 100644 --- a/thrust/complex.h +++ b/thrust/complex.h @@ -38,7 +38,7 @@ reinterpret_cast< \ const typename thrust::detail::remove_reference::type::value_type (&)[2] \ >(z)[1] -# define THRUST_STD_COMPLEX_DEVICE __device__ +# define THRUST_STD_COMPLEX_DEVICE THRUST_DEVICE #else # define THRUST_STD_COMPLEX_REAL(z) (z).real() # define THRUST_STD_COMPLEX_IMAG(z) (z).imag() @@ -168,7 +168,7 @@ struct complex * * \param re The real part of the number. */ - __host__ __device__ + THRUST_HOST_DEVICE complex(const T& re); /*! Construct a complex number from its real and imaginary parts. @@ -176,7 +176,7 @@ struct complex * \param re The real part of the number. * \param im The imaginary part of the number. */ - __host__ __device__ + THRUST_HOST_DEVICE complex(const T& re, const T& im); #if THRUST_CPP_DIALECT >= 2011 @@ -193,7 +193,7 @@ struct complex #else /*! Default construct a complex number. */ - __host__ __device__ + THRUST_HOST_DEVICE complex(); /*! This copy constructor copies from a \p complex with a type that is @@ -201,7 +201,7 @@ struct complex * * \param z The \p complex to copy from. */ - __host__ __device__ + THRUST_HOST_DEVICE complex(const complex& z); #endif @@ -213,7 +213,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex(const complex& z); /*! This converting copy constructor copies from a std::complex with @@ -221,7 +221,7 @@ struct complex * * \param z The \p complex to copy from. */ - __host__ THRUST_STD_COMPLEX_DEVICE + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex(const std::complex& z); /*! This converting copy constructor copies from a std::complex with @@ -232,7 +232,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ THRUST_STD_COMPLEX_DEVICE + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex(const std::complex& z); @@ -244,7 +244,7 @@ struct complex * * \param re The real part of the number. */ - __host__ __device__ + THRUST_HOST_DEVICE complex& operator=(const T& re); #if THRUST_CPP_DIALECT >= 2011 @@ -260,7 +260,7 @@ struct complex * * \param z The \p complex to copy from. */ - __host__ __device__ + THRUST_HOST_DEVICE complex& operator=(const complex& z); #endif @@ -272,7 +272,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator=(const complex& z); /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this @@ -280,7 +280,7 @@ struct complex * * \param z The \p complex to copy from. */ - __host__ THRUST_STD_COMPLEX_DEVICE + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex& operator=(const std::complex& z); /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this @@ -291,7 +291,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ THRUST_STD_COMPLEX_DEVICE + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex& operator=(const std::complex& z); @@ -305,7 +305,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator+=(const complex& z); /*! Subtracts a \p complex from this \p complex and assigns the result to @@ -316,7 +316,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator-=(const complex& z); /*! Multiplies this \p complex by another \p complex and assigns the result @@ -327,7 +327,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator*=(const complex& z); /*! Divides this \p complex by another \p complex and assigns the result to @@ -338,7 +338,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator/=(const complex& z); /*! Adds a scalar to this \p complex and assigns the result to this @@ -349,7 +349,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator+=(const U& z); /*! Subtracts a scalar from this \p complex and assigns the result to @@ -360,7 +360,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator-=(const U& z); /*! Multiplies this \p complex by a scalar and assigns the result @@ -371,7 +371,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator*=(const U& z); /*! Divides this \p complex by a scalar and assigns the result to @@ -382,7 +382,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ __device__ + THRUST_HOST_DEVICE complex& operator/=(const U& z); @@ -394,22 +394,22 @@ struct complex /*! Returns the real part of this \p complex. */ - __host__ __device__ + THRUST_HOST_DEVICE T real() const volatile { return data.x; } /*! Returns the imaginary part of this \p complex. */ - __host__ __device__ + THRUST_HOST_DEVICE T imag() const volatile { return data.y; } /*! Returns the real part of this \p complex. */ - __host__ __device__ + THRUST_HOST_DEVICE T real() const { return data.x; } /*! Returns the imaginary part of this \p complex. */ - __host__ __device__ + THRUST_HOST_DEVICE T imag() const { return data.y; } @@ -423,28 +423,28 @@ struct complex * * \param re The new real part of this \p complex. */ - __host__ __device__ + THRUST_HOST_DEVICE void real(T re) volatile { data.x = re; } /*! Sets the imaginary part of this \p complex. * * \param im The new imaginary part of this \p complex.e */ - __host__ __device__ + THRUST_HOST_DEVICE void imag(T im) volatile { data.y = im; } /*! Sets the real part of this \p complex. * * \param re The new real part of this \p complex. */ - __host__ __device__ + THRUST_HOST_DEVICE void real(T re) { data.x = re; } /*! Sets the imaginary part of this \p complex. * * \param im The new imaginary part of this \p complex. */ - __host__ __device__ + THRUST_HOST_DEVICE void imag(T im) { data.y = im; } @@ -453,7 +453,7 @@ struct complex /*! Casts this \p complex to a std::complex of the same type. */ - __host__ + THRUST_HOST operator std::complex() const { return std::complex(real(), imag()); } private: @@ -468,7 +468,7 @@ struct complex * \param z The \p complex from which to calculate the absolute value. */ template -__host__ __device__ +THRUST_HOST_DEVICE T abs(const complex& z); /*! Returns the phase angle (also known as argument) in radians of a \p complex. @@ -476,7 +476,7 @@ T abs(const complex& z); * \param z The \p complex from which to calculate the phase angle. */ template -__host__ __device__ +THRUST_HOST_DEVICE T arg(const complex& z); /*! Returns the square of the magnitude of a \p complex. @@ -484,7 +484,7 @@ T arg(const complex& z); * \param z The \p complex from which to calculate the norm. */ template -__host__ __device__ +THRUST_HOST_DEVICE T norm(const complex& z); /*! Returns the complex conjugate of a \p complex. @@ -492,7 +492,7 @@ T norm(const complex& z); * \param z The \p complex from which to calculate the complex conjugate. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex conj(const complex& z); /*! Returns a \p complex with the specified magnitude and phase. @@ -501,7 +501,7 @@ complex conj(const complex& z); * \param theta The phase of the returned \p complex in radians. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> polar(const T0& m, const T1& theta = T1()); @@ -513,7 +513,7 @@ polar(const T0& m, const T1& theta = T1()); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex proj(const T& z); @@ -529,7 +529,7 @@ complex proj(const T& z); * \param y The second \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator+(const complex& x, const complex& y); @@ -542,7 +542,7 @@ operator+(const complex& x, const complex& y); * \param y The scalar. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator+(const complex& x, const T1& y); @@ -555,7 +555,7 @@ operator+(const complex& x, const T1& y); * \param y The \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator+(const T0& x, const complex& y); @@ -568,7 +568,7 @@ operator+(const T0& x, const complex& y); * \param y The second \p complex (subtrahend). */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator-(const complex& x, const complex& y); @@ -581,7 +581,7 @@ operator-(const complex& x, const complex& y); * \param y The scalar (subtrahend). */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator-(const complex& x, const T1& y); @@ -594,7 +594,7 @@ operator-(const complex& x, const T1& y); * \param y The \p complex (subtrahend). */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator-(const T0& x, const complex& y); @@ -607,7 +607,7 @@ operator-(const T0& x, const complex& y); * \param y The second \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator*(const complex& x, const complex& y); @@ -617,7 +617,7 @@ operator*(const complex& x, const complex& y); * \param y The scalar. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator*(const complex& x, const T1& y); @@ -630,7 +630,7 @@ operator*(const complex& x, const T1& y); * \param y The \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator*(const T0& x, const complex& y); @@ -643,7 +643,7 @@ operator*(const T0& x, const complex& y); * \param y The denomimator (divisor). */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator/(const complex& x, const complex& y); @@ -656,7 +656,7 @@ operator/(const complex& x, const complex& y); * \param y The scalar denomimator (divisor). */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator/(const complex& x, const T1& y); @@ -669,7 +669,7 @@ operator/(const complex& x, const T1& y); * \param y The complex denomimator (divisor). */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator/(const T0& x, const complex& y); @@ -682,7 +682,7 @@ operator/(const T0& x, const complex& y); * \param y The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex operator+(const complex& y); @@ -692,7 +692,7 @@ operator+(const complex& y); * \param y The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex operator-(const complex& y); @@ -705,7 +705,7 @@ operator-(const complex& y); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex exp(const complex& z); /*! Returns the complex natural logarithm of a \p complex number. @@ -713,7 +713,7 @@ complex exp(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex log(const complex& z); /*! Returns the complex base 10 logarithm of a \p complex number. @@ -721,7 +721,7 @@ complex log(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex log10(const complex& z); @@ -737,7 +737,7 @@ complex log10(const complex& z); * \param y The exponent. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> pow(const complex& x, const complex& y); @@ -750,7 +750,7 @@ pow(const complex& x, const complex& y); * \param y The exponent. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> pow(const complex& x, const T1& y); @@ -763,7 +763,7 @@ pow(const complex& x, const T1& y); * \param y The exponent. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> pow(const T0& x, const complex& y); @@ -772,7 +772,7 @@ pow(const T0& x, const complex& y); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex sqrt(const complex& z); @@ -783,7 +783,7 @@ complex sqrt(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex cos(const complex& z); /*! Returns the complex sine of a \p complex number. @@ -791,7 +791,7 @@ complex cos(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex sin(const complex& z); /*! Returns the complex tangent of a \p complex number. @@ -799,7 +799,7 @@ complex sin(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex tan(const complex& z); @@ -811,7 +811,7 @@ complex tan(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex cosh(const complex& z); /*! Returns the complex hyperbolic sine of a \p complex number. @@ -819,7 +819,7 @@ complex cosh(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex sinh(const complex& z); /*! Returns the complex hyperbolic tangent of a \p complex number. @@ -827,7 +827,7 @@ complex sinh(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex tanh(const complex& z); @@ -842,7 +842,7 @@ complex tanh(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex acos(const complex& z); /*! Returns the complex arc sine of a \p complex number. @@ -853,7 +853,7 @@ complex acos(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex asin(const complex& z); /*! Returns the complex arc tangent of a \p complex number. @@ -864,7 +864,7 @@ complex asin(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex atan(const complex& z); @@ -879,7 +879,7 @@ complex atan(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex acosh(const complex& z); /*! Returns the complex inverse hyperbolic sine of a \p complex number. @@ -890,7 +890,7 @@ complex acosh(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex asinh(const complex& z); /*! Returns the complex inverse hyperbolic tangent of a \p complex number. @@ -901,7 +901,7 @@ complex asinh(const complex& z); * \param z The \p complex argument. */ template -__host__ __device__ +THRUST_HOST_DEVICE complex atanh(const complex& z); @@ -930,7 +930,7 @@ operator<<(std::basic_ostream& os, const complex& z); * \param z The \p complex number to set. */ template -__host__ +THRUST_HOST std::basic_istream& operator>>(std::basic_istream& is, complex& z); @@ -944,7 +944,7 @@ operator>>(std::basic_istream& is, complex& z); * \param y The second \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const complex& x, const complex& y); /*! Returns true if two \p complex numbers are equal and false otherwise. @@ -953,7 +953,7 @@ bool operator==(const complex& x, const complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator==(const complex& x, const std::complex& y); /*! Returns true if two \p complex numbers are equal and false otherwise. @@ -962,7 +962,7 @@ bool operator==(const complex& x, const std::complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator==(const std::complex& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is zero and @@ -972,7 +972,7 @@ bool operator==(const std::complex& x, const complex& y); * \param y The \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const T0& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is zero and @@ -982,7 +982,7 @@ bool operator==(const T0& x, const complex& y); * \param y The scalar. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const complex& x, const T1& y); /*! Returns true if two \p complex numbers are different and false otherwise. @@ -991,7 +991,7 @@ bool operator==(const complex& x, const T1& y); * \param y The second \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const complex& x, const complex& y); /*! Returns true if two \p complex numbers are different and false otherwise. @@ -1000,7 +1000,7 @@ bool operator!=(const complex& x, const complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator!=(const complex& x, const std::complex& y); /*! Returns true if two \p complex numbers are different and false otherwise. @@ -1009,7 +1009,7 @@ bool operator!=(const complex& x, const std::complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator!=(const std::complex& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is not zero or @@ -1019,7 +1019,7 @@ bool operator!=(const std::complex& x, const complex& y); * \param y The \p complex. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const T0& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is not zero or @@ -1029,7 +1029,7 @@ bool operator!=(const T0& x, const complex& y); * \param y The scalar. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const complex& x, const T1& y); THRUST_NAMESPACE_END diff --git a/thrust/copy.h b/thrust/copy.h index de3fecf9e..7b8308748 100644 --- a/thrust/copy.h +++ b/thrust/copy.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -81,7 +81,7 @@ THRUST_NAMESPACE_BEGIN * \endcode */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -134,7 +134,7 @@ __host__ __device__ * \see thrust::copy */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(const thrust::detail::execution_policy_base &exec, InputIterator first, Size n, @@ -277,7 +277,7 @@ template * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int x) * { * return (x % 2) == 0; @@ -297,7 +297,7 @@ template * \see \c remove_copy_if */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -338,7 +338,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int x) * { * return (x % 2) == 0; @@ -406,7 +406,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, @@ -472,7 +472,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int x) * { * return (x % 2) == 0; diff --git a/thrust/count.h b/thrust/count.h index abf8b2d6c..0937f60bc 100644 --- a/thrust/count.h +++ b/thrust/count.h @@ -79,7 +79,7 @@ THRUST_NAMESPACE_BEGIN * \see https://en.cppreference.com/w/cpp/algorithm/count */ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, const EqualityComparable& value); @@ -147,7 +147,7 @@ template * ... * struct is_odd * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int &x) * { * return x & 1; @@ -170,7 +170,7 @@ template * \see https://en.cppreference.com/w/cpp/algorithm/count */ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); @@ -195,7 +195,7 @@ __host__ __device__ * ... * struct is_odd * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int &x) * { * return x & 1; diff --git a/thrust/detail/adjacent_difference.inl b/thrust/detail/adjacent_difference.inl index 844687cff..f753519bb 100644 --- a/thrust/detail/adjacent_difference.inl +++ b/thrust/detail/adjacent_difference.inl @@ -23,9 +23,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result) @@ -36,9 +36,9 @@ OutputIterator adjacent_difference(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, diff --git a/thrust/detail/advance.inl b/thrust/detail/advance.inl index 7b5f261bd..7fd9a143e 100644 --- a/thrust/detail/advance.inl +++ b/thrust/detail/advance.inl @@ -29,14 +29,14 @@ THRUST_NAMESPACE_BEGIN __THRUST_DEFINE_HAS_NESTED_TYPE(has_difference_type, difference_type) template -__host__ __device__ +THRUST_HOST_DEVICE void advance(InputIterator& i, Distance n) { thrust::system::detail::generic::advance(i, n); } template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator next( InputIterator i , typename iterator_traits::difference_type n = 1 @@ -47,7 +47,7 @@ InputIterator next( } template -__host__ __device__ +THRUST_HOST_DEVICE BidirectionalIterator prev( BidirectionalIterator i , typename iterator_traits::difference_type n = 1 @@ -58,7 +58,7 @@ BidirectionalIterator prev( } template -__host__ __device__ +THRUST_HOST_DEVICE typename detail::disable_if< has_difference_type >::value , BidirectionalIterator diff --git a/thrust/detail/alignment.h b/thrust/detail/alignment.h index c120a7454..f01f14f62 100644 --- a/thrust/detail/alignment.h +++ b/thrust/detail/alignment.h @@ -187,13 +187,13 @@ struct aligned_type; /// \p aligned_reinterpret_cast is responsible for ensuring that the alignment /// requirements are actually satisified. template -__host__ __device__ +THRUST_HOST_DEVICE T aligned_reinterpret_cast(U u) { return reinterpret_cast(reinterpret_cast(u)); } -__host__ __device__ +THRUST_HOST_DEVICE inline std::size_t aligned_storage_size(std::size_t n, std::size_t align) { return ((n + align - 1) / align) * align; diff --git a/thrust/detail/allocator/allocator_traits.h b/thrust/detail/allocator/allocator_traits.h index 4a51ebd85..6e7c4a6da 100644 --- a/thrust/detail/allocator/allocator_traits.h +++ b/thrust/detail/allocator/allocator_traits.h @@ -371,32 +371,32 @@ template typedef typename thrust::detail::pointer_traits::reference reference; typedef typename thrust::detail::pointer_traits::reference const_reference; - inline __host__ __device__ + inline THRUST_HOST_DEVICE static pointer allocate(allocator_type &a, size_type n); - inline __host__ __device__ + inline THRUST_HOST_DEVICE static pointer allocate(allocator_type &a, size_type n, const_void_pointer hint); - inline __host__ __device__ + inline THRUST_HOST_DEVICE static void deallocate(allocator_type &a, pointer p, size_type n); // XXX should probably change T* to pointer below and then relax later template - inline __host__ __device__ static void construct(allocator_type &a, T *p); + inline THRUST_HOST_DEVICE static void construct(allocator_type &a, T *p); template - inline __host__ __device__ static void construct(allocator_type &a, T *p, const Arg1 &arg1); + inline THRUST_HOST_DEVICE static void construct(allocator_type &a, T *p, const Arg1 &arg1); #if THRUST_CPP_DIALECT >= 2011 template - inline __host__ __device__ static void construct(allocator_type &a, T *p, Args&&... args); + inline THRUST_HOST_DEVICE static void construct(allocator_type &a, T *p, Args&&... args); #endif template - inline __host__ __device__ static void destroy(allocator_type &a, T *p); + inline THRUST_HOST_DEVICE static void destroy(allocator_type &a, T *p); - inline __host__ __device__ + inline THRUST_HOST_DEVICE static size_type max_size(const allocator_type &a); }; // end allocator_traits @@ -430,7 +430,7 @@ template identity_ // else get() needs to return a value >::type get_result_type; - __host__ __device__ + THRUST_HOST_DEVICE inline static get_result_type get(Alloc &a); }; diff --git a/thrust/detail/allocator/allocator_traits.inl b/thrust/detail/allocator/allocator_traits.inl index 34f1bc9be..0ca61b234 100644 --- a/thrust/detail/allocator/allocator_traits.inl +++ b/thrust/detail/allocator/allocator_traits.inl @@ -81,45 +81,45 @@ public: template using rebind_traits = allocator_traits>; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE static pointer allocate(allocator_type &a, size_type n) { return superclass::allocate(a, n); } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE static pointer allocate(allocator_type &a, size_type n, const_void_pointer hint) { return superclass::allocate(a, n, hint); } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE static void deallocate(allocator_type &a, pointer p, size_type n) { superclass::deallocate(a, p, n); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE static void construct(allocator_type &a, U *p, Args&&... args) { superclass::construct(a, p, THRUST_FWD(args)...); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE static void destroy(allocator_type &a, U *p) { superclass::destroy(a, p); } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE static size_type max_size(const allocator_type &a) { return superclass::max_size(a); @@ -146,7 +146,7 @@ template }; template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if< has_member_allocate_with_hint::value, typename allocator_traits::pointer @@ -157,7 +157,7 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if< has_member_allocate_with_hint::value, typename allocator_traits::pointer @@ -175,9 +175,9 @@ template : has_member_construct1_impl {}; -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if< has_member_construct1::value >::type @@ -186,9 +186,9 @@ template a.construct(p); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename disable_if< has_member_construct1::value >::type @@ -205,9 +205,9 @@ template : has_member_construct2_impl {}; -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if< has_member_construct2::value >::type @@ -216,9 +216,9 @@ template a.construct(p,arg1); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename disable_if< has_member_construct2::value >::type @@ -236,9 +236,9 @@ template : has_member_constructN_impl {}; -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if< has_member_constructN::value >::type @@ -247,9 +247,9 @@ template a.construct(p, THRUST_FWD(args)...); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename disable_if< has_member_constructN::value >::type @@ -267,9 +267,9 @@ template : has_member_destroy_impl {}; -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if< has_member_destroy::value >::type @@ -278,9 +278,9 @@ template a.destroy(p); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename disable_if< has_member_destroy::value >::type @@ -303,7 +303,7 @@ template }; template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if< has_member_max_size::value, typename allocator_traits::size_type @@ -314,7 +314,7 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if< has_member_max_size::value, typename allocator_traits::size_type @@ -326,7 +326,7 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if< has_member_system::value, typename allocator_system::type & @@ -338,7 +338,7 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if< has_member_system::value, typename allocator_system::type @@ -354,15 +354,15 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename allocator_traits::pointer allocator_traits ::allocate(Alloc &a, typename allocator_traits::size_type n) { struct workaround_warnings { - __thrust_exec_check_disable__ - static __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + static THRUST_HOST_DEVICE typename allocator_traits::pointer allocate(Alloc &a, typename allocator_traits::size_type n) { @@ -374,7 +374,7 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE typename allocator_traits::pointer allocator_traits ::allocate(Alloc &a, typename allocator_traits::size_type n, typename allocator_traits::const_void_pointer hint) @@ -383,14 +383,14 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE void allocator_traits ::deallocate(Alloc &a, typename allocator_traits::pointer p, typename allocator_traits::size_type n) { struct workaround_warnings { - __thrust_exec_check_disable__ - static __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + static THRUST_HOST_DEVICE void deallocate(Alloc &a, typename allocator_traits::pointer p, typename allocator_traits::size_type n) { return a.deallocate(p,n); @@ -402,7 +402,7 @@ __host__ __device__ template template - __host__ __device__ + THRUST_HOST_DEVICE void allocator_traits ::construct(allocator_type &a, T *p) { @@ -411,7 +411,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE void allocator_traits ::construct(allocator_type &a, T *p, const Arg1 &arg1) { @@ -422,7 +422,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE void allocator_traits ::construct(allocator_type &a, T *p, Args&&... args) { @@ -433,7 +433,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE void allocator_traits ::destroy(allocator_type &a, T *p) { @@ -441,7 +441,7 @@ template } template -__host__ __device__ +THRUST_HOST_DEVICE typename allocator_traits::size_type allocator_traits ::max_size(const allocator_type &a) @@ -450,7 +450,7 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE typename allocator_system::get_result_type allocator_system ::get(Alloc &a) diff --git a/thrust/detail/allocator/copy_construct_range.h b/thrust/detail/allocator/copy_construct_range.h index b3c2de324..56d38679b 100644 --- a/thrust/detail/allocator/copy_construct_range.h +++ b/thrust/detail/allocator/copy_construct_range.h @@ -24,7 +24,7 @@ namespace detail { template -__host__ __device__ +THRUST_HOST_DEVICE Pointer copy_construct_range(thrust::execution_policy &from_system, Allocator &a, InputIterator first, @@ -32,7 +32,7 @@ __host__ __device__ Pointer result); template -__host__ __device__ +THRUST_HOST_DEVICE Pointer copy_construct_range_n(thrust::execution_policy &from_system, Allocator &a, InputIterator first, diff --git a/thrust/detail/allocator/copy_construct_range.inl b/thrust/detail/allocator/copy_construct_range.inl index d8c3cb7b2..38de7882c 100644 --- a/thrust/detail/allocator/copy_construct_range.inl +++ b/thrust/detail/allocator/copy_construct_range.inl @@ -40,13 +40,13 @@ template { Allocator &a; - __host__ __device__ + THRUST_HOST_DEVICE copy_construct_with_allocator(Allocator &a) : a(a) {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(Tuple t) { const InputType &in = thrust::get<0>(t); @@ -86,7 +86,7 @@ template // perhaps generic::uninitialized_copy could call this routine // with a default allocator template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if_convertible< FromSystem, ToSystem, @@ -128,7 +128,7 @@ __host__ __device__ // perhaps generic::uninitialized_copy_n could call this routine // with a default allocator template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if_convertible< FromSystem, ToSystem, @@ -161,7 +161,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if_convertible< FromSystem, ToSystem, @@ -181,7 +181,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if_convertible< FromSystem, ToSystem, @@ -201,7 +201,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if< needs_copy_construct_via_allocator< Allocator, @@ -221,7 +221,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if< needs_copy_construct_via_allocator< Allocator, @@ -241,7 +241,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if< needs_copy_construct_via_allocator< Allocator, @@ -260,7 +260,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if< needs_copy_construct_via_allocator< Allocator, @@ -282,7 +282,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE Pointer copy_construct_range(thrust::execution_policy &from_system, Allocator &a, InputIterator first, @@ -294,7 +294,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE Pointer copy_construct_range_n(thrust::execution_policy &from_system, Allocator &a, InputIterator first, diff --git a/thrust/detail/allocator/default_construct_range.h b/thrust/detail/allocator/default_construct_range.h index 8b5026c05..213a1c6eb 100644 --- a/thrust/detail/allocator/default_construct_range.h +++ b/thrust/detail/allocator/default_construct_range.h @@ -24,7 +24,7 @@ namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE inline void default_construct_range(Allocator &a, Pointer p, Size n); diff --git a/thrust/detail/allocator/default_construct_range.inl b/thrust/detail/allocator/default_construct_range.inl index 6d26578fa..e34f34fff 100644 --- a/thrust/detail/allocator/default_construct_range.inl +++ b/thrust/detail/allocator/default_construct_range.inl @@ -35,13 +35,13 @@ template { Allocator &a; - __host__ __device__ + THRUST_HOST_DEVICE construct1_via_allocator(Allocator &a) : a(a) {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(T &x) { allocator_traits::construct(a, &x); @@ -69,7 +69,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if< needs_default_construct_via_allocator< Allocator, @@ -83,7 +83,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if< needs_default_construct_via_allocator< Allocator, @@ -100,7 +100,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void default_construct_range(Allocator &a, Pointer p, Size n) { return allocator_traits_detail::default_construct_range(a,p,n); diff --git a/thrust/detail/allocator/destroy_range.h b/thrust/detail/allocator/destroy_range.h index cfc7e3f6e..403663d88 100644 --- a/thrust/detail/allocator/destroy_range.h +++ b/thrust/detail/allocator/destroy_range.h @@ -23,7 +23,7 @@ namespace detail { template -__host__ __device__ +THRUST_HOST_DEVICE inline void destroy_range(Allocator &a, Pointer p, Size n); } // end detail diff --git a/thrust/detail/allocator/destroy_range.inl b/thrust/detail/allocator/destroy_range.inl index 26107acc5..5c7ef3cd3 100644 --- a/thrust/detail/allocator/destroy_range.inl +++ b/thrust/detail/allocator/destroy_range.inl @@ -95,13 +95,13 @@ template { Allocator &a; - __host__ __device__ + THRUST_HOST_DEVICE destroy_via_allocator(Allocator &a) : a(a) {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(T &x) { allocator_traits::destroy(a, &x); @@ -111,7 +111,7 @@ template // destroy_range case 1: destroy via allocator template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if_destroy_range_case1::type destroy_range(Allocator &a, Pointer p, Size n) { @@ -122,9 +122,9 @@ __host__ __device__ // we must prepare for His coming struct gozer { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(T &x) { x.~T(); @@ -133,7 +133,7 @@ struct gozer // destroy_range case 2: destroy without the allocator template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if_destroy_range_case2::type destroy_range(Allocator &a, Pointer p, Size n) { @@ -143,7 +143,7 @@ __host__ __device__ // destroy_range case 3: no-op template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if_destroy_range_case3::type destroy_range(Allocator &, Pointer, Size) { @@ -155,7 +155,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void destroy_range(Allocator &a, Pointer p, Size n) { return allocator_traits_detail::destroy_range(a,p,n); diff --git a/thrust/detail/allocator/fill_construct_range.h b/thrust/detail/allocator/fill_construct_range.h index a7572cb2d..f8275a077 100644 --- a/thrust/detail/allocator/fill_construct_range.h +++ b/thrust/detail/allocator/fill_construct_range.h @@ -24,7 +24,7 @@ namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE inline void fill_construct_range(Allocator &a, Pointer p, Size n, const T &value); diff --git a/thrust/detail/allocator/fill_construct_range.inl b/thrust/detail/allocator/fill_construct_range.inl index 5fd7d476f..92f524fd2 100644 --- a/thrust/detail/allocator/fill_construct_range.inl +++ b/thrust/detail/allocator/fill_construct_range.inl @@ -54,13 +54,13 @@ template Allocator &a; Arg1 arg; - __host__ __device__ + THRUST_HOST_DEVICE construct2_via_allocator(Allocator &a, const Arg1 &arg) : a(a), arg(arg) {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(T &x) { allocator_traits::construct(a, &x, arg); @@ -69,7 +69,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE typename enable_if< has_effectful_member_construct2< Allocator, @@ -84,7 +84,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename disable_if< has_effectful_member_construct2< Allocator, @@ -102,7 +102,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void fill_construct_range(Alloc &a, Pointer p, Size n, const T &value) { return allocator_traits_detail::fill_construct_range(a,p,n,value); diff --git a/thrust/detail/allocator/no_throw_allocator.h b/thrust/detail/allocator/no_throw_allocator.h index bcd2aa049..cf54e9f9f 100644 --- a/thrust/detail/allocator/no_throw_allocator.h +++ b/thrust/detail/allocator/no_throw_allocator.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019-2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ template typedef BaseAllocator super_t; public: - inline __host__ __device__ + inline THRUST_HOST_DEVICE no_throw_allocator(const BaseAllocator &other = BaseAllocator()) : super_t(other) {} @@ -43,7 +43,7 @@ template typedef no_throw_allocator::other> other; }; // end rebind - __host__ __device__ + THRUST_HOST_DEVICE void deallocate(typename super_t::pointer p, typename super_t::size_type n) { NV_IF_TARGET(NV_IS_HOST, ( @@ -60,10 +60,10 @@ template )); } // end deallocate() - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator==(no_throw_allocator const &other) { return super_t::operator==(other); } - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator!=(no_throw_allocator const &other) { return super_t::operator!=(other); } }; // end no_throw_allocator diff --git a/thrust/detail/allocator/tagged_allocator.h b/thrust/detail/allocator/tagged_allocator.h index 804c4e42e..9ba9e22e6 100644 --- a/thrust/detail/allocator/tagged_allocator.h +++ b/thrust/detail/allocator/tagged_allocator.h @@ -63,34 +63,34 @@ template typedef tagged_allocator other; }; // end rebind - __host__ __device__ + THRUST_HOST_DEVICE inline tagged_allocator(); - __host__ __device__ + THRUST_HOST_DEVICE inline tagged_allocator(const tagged_allocator &); template - __host__ __device__ + THRUST_HOST_DEVICE inline tagged_allocator(const tagged_allocator &); - __host__ __device__ + THRUST_HOST_DEVICE inline ~tagged_allocator(); - __host__ __device__ + THRUST_HOST_DEVICE pointer address(reference x) const; - __host__ __device__ + THRUST_HOST_DEVICE const_pointer address(const_reference x) const; size_type max_size() const; }; template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const tagged_allocator &, const tagged_allocator &); template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const tagged_allocator &, const tagged_allocator &); } // end detail diff --git a/thrust/detail/allocator/tagged_allocator.inl b/thrust/detail/allocator/tagged_allocator.inl index bcd534cbc..4ac54e17e 100644 --- a/thrust/detail/allocator/tagged_allocator.inl +++ b/thrust/detail/allocator/tagged_allocator.inl @@ -26,14 +26,14 @@ namespace detail template - __host__ __device__ + THRUST_HOST_DEVICE tagged_allocator ::tagged_allocator() {} template - __host__ __device__ + THRUST_HOST_DEVICE tagged_allocator ::tagged_allocator(const tagged_allocator &) {} @@ -41,21 +41,21 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE tagged_allocator ::tagged_allocator(const tagged_allocator &) {} template - __host__ __device__ + THRUST_HOST_DEVICE tagged_allocator ::~tagged_allocator() {} template - __host__ __device__ + THRUST_HOST_DEVICE typename tagged_allocator::pointer tagged_allocator ::address(reference x) const @@ -65,7 +65,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename tagged_allocator::const_pointer tagged_allocator ::address(const_reference x) const @@ -84,7 +84,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const tagged_allocator &, const tagged_allocator &) { return true; @@ -92,7 +92,7 @@ bool operator==(const tagged_allocator &, const tagged_allocato template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const tagged_allocator &, const tagged_allocator &) { return false; diff --git a/thrust/detail/allocator/temporary_allocator.h b/thrust/detail/allocator/temporary_allocator.h index c8ef60625..388c0fa39 100644 --- a/thrust/detail/allocator/temporary_allocator.h +++ b/thrust/detail/allocator/temporary_allocator.h @@ -48,25 +48,25 @@ template typedef typename super_t::pointer pointer; typedef typename super_t::size_type size_type; - inline __host__ __device__ + inline THRUST_HOST_DEVICE temporary_allocator(const temporary_allocator &other) : super_t(), m_system(other.m_system) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE explicit temporary_allocator(thrust::execution_policy &system) : super_t(), m_system(thrust::detail::derived_cast(system)) {} - __host__ __device__ + THRUST_HOST_DEVICE pointer allocate(size_type cnt); - __host__ __device__ + THRUST_HOST_DEVICE void deallocate(pointer p, size_type n); - __host__ __device__ + THRUST_HOST_DEVICE inline System &system() { return m_system; diff --git a/thrust/detail/allocator/temporary_allocator.inl b/thrust/detail/allocator/temporary_allocator.inl index 34c9788b4..f813107c8 100644 --- a/thrust/detail/allocator/temporary_allocator.inl +++ b/thrust/detail/allocator/temporary_allocator.inl @@ -40,7 +40,7 @@ namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE typename temporary_allocator::pointer temporary_allocator ::allocate(typename temporary_allocator::size_type cnt) @@ -76,7 +76,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void temporary_allocator ::deallocate(typename temporary_allocator::pointer p, typename temporary_allocator::size_type n) { diff --git a/thrust/detail/binary_search.inl b/thrust/detail/binary_search.inl index 90350ced4..e9c37aacb 100644 --- a/thrust/detail/binary_search.inl +++ b/thrust/detail/binary_search.inl @@ -25,9 +25,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -38,9 +38,9 @@ ForwardIterator lower_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -52,9 +52,9 @@ ForwardIterator lower_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -65,9 +65,9 @@ ForwardIterator upper_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -79,9 +79,9 @@ ForwardIterator upper_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -92,9 +92,9 @@ bool binary_search(const thrust::detail::execution_policy_base &e } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -106,9 +106,9 @@ bool binary_search(const thrust::detail::execution_policy_base &e } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -121,9 +121,9 @@ equal_range(const thrust::detail::execution_policy_base &exec, } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -135,9 +135,9 @@ equal_range(const thrust::detail::execution_policy_base &exec, } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -150,9 +150,9 @@ OutputIterator lower_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -166,9 +166,9 @@ OutputIterator lower_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -181,9 +181,9 @@ OutputIterator upper_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -197,9 +197,9 @@ OutputIterator upper_bound(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -212,9 +212,9 @@ OutputIterator binary_search(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/detail/complex/arithmetic.h b/thrust/detail/complex/arithmetic.h index dab294eed..2a8e43d2c 100644 --- a/thrust/detail/complex/arithmetic.h +++ b/thrust/detail/complex/arithmetic.h @@ -1,7 +1,7 @@ /* * Copyright 2008-2021 NVIDIA Corporation * Copyright 2013 Filipe RNC Maia - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ THRUST_NAMESPACE_BEGIN /* --- Binary Arithmetic Operators --- */ template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator+(const complex& x, const complex& y) { @@ -39,7 +39,7 @@ operator+(const complex& x, const complex& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator+(const complex& x, const T1& y) { @@ -48,7 +48,7 @@ operator+(const complex& x, const T1& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator+(const T0& x, const complex& y) { @@ -58,7 +58,7 @@ operator+(const T0& x, const complex& y) template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator-(const complex& x, const complex& y) { @@ -67,7 +67,7 @@ operator-(const complex& x, const complex& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator-(const complex& x, const T1& y) { @@ -76,7 +76,7 @@ operator-(const complex& x, const T1& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator-(const T0& x, const complex& y) { @@ -86,7 +86,7 @@ operator-(const T0& x, const complex& y) template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator*(const complex& x, const complex& y) { @@ -96,7 +96,7 @@ operator*(const complex& x, const complex& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator*(const complex& x, const T1& y) { @@ -105,7 +105,7 @@ operator*(const complex& x, const T1& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator*(const T0& x, const complex& y) { @@ -115,7 +115,7 @@ operator*(const T0& x, const complex& y) template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator/(const complex& x, const complex& y) { @@ -143,7 +143,7 @@ operator/(const complex& x, const complex& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator/(const complex& x, const T1& y) { @@ -152,7 +152,7 @@ operator/(const complex& x, const T1& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> operator/(const T0& x, const complex& y) { @@ -165,14 +165,14 @@ operator/(const T0& x, const complex& y) /* --- Unary Arithmetic Operators --- */ template -__host__ __device__ +THRUST_HOST_DEVICE complex operator+(const complex& y) { return y; } template -__host__ __device__ +THRUST_HOST_DEVICE complex operator-(const complex& y) { return y * -T(1); @@ -183,7 +183,7 @@ complex operator-(const complex& y) // As std::hypot is only C++11 we have to use the C interface template -__host__ __device__ +THRUST_HOST_DEVICE T abs(const complex& z) { return hypot(z.real(), z.imag()); @@ -193,13 +193,13 @@ T abs(const complex& z) namespace detail { namespace complex { -__host__ __device__ +THRUST_HOST_DEVICE inline float abs(const thrust::complex& z) { return hypotf(z.real(),z.imag()); } -__host__ __device__ +THRUST_HOST_DEVICE inline double abs(const thrust::complex& z) { return hypot(z.real(),z.imag()); @@ -209,14 +209,14 @@ inline double abs(const thrust::complex& z) } // end namespace detail template <> -__host__ __device__ +THRUST_HOST_DEVICE inline float abs(const complex& z) { return detail::complex::abs(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline double abs(const complex& z) { return detail::complex::abs(z); @@ -224,7 +224,7 @@ inline double abs(const complex& z) template -__host__ __device__ +THRUST_HOST_DEVICE T arg(const complex& z) { // Find `atan2` by ADL. @@ -238,7 +238,7 @@ T arg(const complex& z) template -__host__ __device__ +THRUST_HOST_DEVICE complex conj(const complex& z) { return complex(z.real(), -z.imag()); @@ -246,7 +246,7 @@ complex conj(const complex& z) template -__host__ __device__ +THRUST_HOST_DEVICE T norm(const complex& z) { return z.real() * z.real() + z.imag() * z.imag(); @@ -254,7 +254,7 @@ T norm(const complex& z) // XXX Why specialize these, we could just rely on ADL. template <> -__host__ __device__ +THRUST_HOST_DEVICE inline float norm(const complex& z) { // Find `abs` and `sqrt` by ADL. @@ -276,7 +276,7 @@ inline float norm(const complex& z) } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline double norm(const complex& z) { // Find `abs` and `sqrt` by ADL. @@ -300,7 +300,7 @@ inline double norm(const complex& z) template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> polar(const T0& m, const T1& theta) { diff --git a/thrust/detail/complex/c99math.h b/thrust/detail/complex/c99math.h index f4fbfe9ed..66c6cc9a6 100644 --- a/thrust/detail/complex/c99math.h +++ b/thrust/detail/complex/c99math.h @@ -49,10 +49,10 @@ using ::cosh; using ::atan; template -inline __host__ __device__ T infinity(); +inline THRUST_HOST_DEVICE T infinity(); template <> -inline __host__ __device__ float infinity() +inline THRUST_HOST_DEVICE float infinity() { float res; set_float_word(res, 0x7f800000); @@ -61,7 +61,7 @@ inline __host__ __device__ float infinity() template <> -inline __host__ __device__ double infinity() +inline THRUST_HOST_DEVICE double infinity() { double res; insert_words(res, 0x7ff00000,0); @@ -87,35 +87,35 @@ inline __host__ __device__ double infinity() #endif // HIP compiler #if defined _MSC_VER -__host__ __device__ inline int isinf(float x){ +THRUST_HOST_DEVICE inline int isinf(float x){ return std::abs(x) == infinity(); } -__host__ __device__ inline int isinf(double x){ +THRUST_HOST_DEVICE inline int isinf(double x){ return std::abs(x) == infinity(); } -__host__ __device__ inline int isnan(float x){ +THRUST_HOST_DEVICE inline int isnan(float x){ return x != x; } -__host__ __device__ inline int isnan(double x){ +THRUST_HOST_DEVICE inline int isnan(double x){ return x != x; } -__host__ __device__ inline int signbit(float x){ +THRUST_HOST_DEVICE inline int signbit(float x){ return ((*((uint32_t *)&x)) & 0x80000000) != 0 ? 1 : 0; } -__host__ __device__ inline int signbit(double x){ +THRUST_HOST_DEVICE inline int signbit(double x){ return ((*((uint64_t *)&x)) & 0x8000000000000000) != 0ull ? 1 : 0; } -__host__ __device__ inline int isfinite(float x){ +THRUST_HOST_DEVICE inline int isfinite(float x){ return !isnan(x) && !isinf(x); } -__host__ __device__ inline int isfinite(double x){ +THRUST_HOST_DEVICE inline int isfinite(double x){ return !isnan(x) && !isinf(x); } @@ -155,7 +155,7 @@ using ::atanh; #if defined _MSC_VER -__host__ __device__ inline double copysign(double x, double y){ +THRUST_HOST_DEVICE inline double copysign(double x, double y){ uint32_t hx,hy; get_high_word(hx,x); get_high_word(hy,y); @@ -163,7 +163,7 @@ __host__ __device__ inline double copysign(double x, double y){ return x; } -__host__ __device__ inline float copysignf(float x, float y){ +THRUST_HOST_DEVICE inline float copysignf(float x, float y){ uint32_t ix,iy; get_float_word(ix,x); get_float_word(iy,y); @@ -176,7 +176,7 @@ __host__ __device__ inline float copysignf(float x, float y){ #if !defined(__CUDACC__) && !defined(_NVHPC_CUDA) // Simple approximation to log1p as Visual Studio is lacking one -__host__ __device__ inline double log1p(double x){ +THRUST_HOST_DEVICE inline double log1p(double x){ double u = 1.0+x; if(u == 1.0){ return x; @@ -190,7 +190,7 @@ __host__ __device__ inline double log1p(double x){ } } -__host__ __device__ inline float log1pf(float x){ +THRUST_HOST_DEVICE inline float log1pf(float x){ float u = 1.0f+x; if(u == 1.0f){ return x; diff --git a/thrust/detail/complex/catrig.h b/thrust/detail/complex/catrig.h index 7db9f9f47..e51089cb9 100644 --- a/thrust/detail/complex/catrig.h +++ b/thrust/detail/complex/catrig.h @@ -60,7 +60,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ +THRUST_HOST_DEVICE inline void raise_inexact(){ const volatile float tiny = 7.888609052210118054117286e-31; /* 0x1p-100; */ // needs the volatile to prevent compiler from ignoring it @@ -68,7 +68,7 @@ inline void raise_inexact(){ (void)junk; } -__host__ __device__ inline complex clog_for_large_values(complex z); +THRUST_HOST_DEVICE inline complex clog_for_large_values(complex z); /* * Testing indicates that all these functions are accurate up to 4 ULP. @@ -137,7 +137,7 @@ __host__ __device__ inline complex clog_for_large_values(complex * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. * Pass hypot(a, b) as the third argument. */ -__host__ __device__ +THRUST_HOST_DEVICE inline double f(double a, double b, double hypot_a_b) { @@ -158,7 +158,7 @@ f(double a, double b, double hypot_a_b) * If returning sqrt_A2my2 has potential to result in an underflow, it is * rescaled, and new_y is similarly rescaled. */ -__host__ __device__ +THRUST_HOST_DEVICE inline void do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B, double *sqrt_A2my2, double *new_y) @@ -284,7 +284,7 @@ do_hard_work(double x, double y, double *rx, int *B_is_usable, double *B, * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3) * as z -> infinity, uniformly in y */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex casinh(complex z) { double x, y, ax, ay, rx, ry, B, sqrt_A2my2, new_y; @@ -347,7 +347,7 @@ complex casinh(complex z) * casin(z) = reverse(casinh(reverse(z))) * where reverse(x + I*y) = y + I*x = I*conj(z). */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex casin(complex z) { complex w = casinh(complex(z.imag(), z.real())); @@ -367,7 +367,7 @@ complex casin(complex z) * Re(cacos(z)) = atan2(fabs(y), x) + O(y/z^3) * as z -> infinity, uniformly in y */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex cacos(complex z) { double x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; @@ -446,7 +446,7 @@ complex cacos(complex z) * cacosh(z) = I*cacos(z) or -I*cacos(z) * where the sign is chosen so Re(cacosh(z)) >= 0. */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex cacosh(complex z) { complex w; @@ -471,7 +471,7 @@ complex cacosh(complex z) /* * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex clog_for_large_values(complex z) { double x, y; @@ -523,7 +523,7 @@ complex clog_for_large_values(complex z) * Assumes y is non-negative. * Assumes fabs(x) >= DBL_EPSILON. */ -__host__ __device__ +THRUST_HOST_DEVICE inline double sum_squares(double x, double y) { const double SQRT_MIN = 1.491668146240041348658193e-154; /* = 0x1p-511; >= sqrt(DBL_MIN) */ @@ -543,7 +543,7 @@ inline double sum_squares(double x, double y) * This is only called in a context where inexact is always raised before * the call, so no effort is made to avoid or force inexact. */ -__host__ __device__ +THRUST_HOST_DEVICE inline double real_part_reciprocal(double x, double y) { double scale; @@ -590,7 +590,7 @@ inline double real_part_reciprocal(double x, double y) * as z -> infinity, uniformly in x */ #if (THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC) && !defined(__clang__) -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex catanh(complex z) { double x, y, ax, ay, rx, ry; @@ -663,7 +663,7 @@ complex catanh(complex z) * catan(z) = reverse(catanh(reverse(z))) * where reverse(x + I*y) = y + I*x = I*conj(z). */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline complexcatan(complex z) { complex w = catanh(complex(z.imag(), z.real())); @@ -678,7 +678,7 @@ complexcatan(complex z) template -__host__ __device__ +THRUST_HOST_DEVICE inline complex acos(const complex& z){ const complex ret = thrust::asin(z); const ValueType pi = ValueType(3.14159265358979323846); @@ -687,14 +687,14 @@ inline complex acos(const complex& z){ template -__host__ __device__ +THRUST_HOST_DEVICE inline complex asin(const complex& z){ const complex i(0,1); return -i*asinh(i*z); } template -__host__ __device__ +THRUST_HOST_DEVICE inline complex atan(const complex& z){ const complex i(0,1); return -i*thrust::atanh(i*z); @@ -702,7 +702,7 @@ inline complex atan(const complex& z){ template -__host__ __device__ +THRUST_HOST_DEVICE inline complex acosh(const complex& z){ thrust::complex ret((z.real() - z.imag()) * (z.real() + z.imag()) - ValueType(1.0), ValueType(2.0) * z.real() * z.imag()); @@ -719,13 +719,13 @@ inline complex acosh(const complex& z){ } template -__host__ __device__ +THRUST_HOST_DEVICE inline complex asinh(const complex& z){ return thrust::log(thrust::sqrt(z*z+ValueType(1))+z); } template -__host__ __device__ +THRUST_HOST_DEVICE inline complex atanh(const complex& z){ ValueType imag2 = z.imag() * z.imag(); ValueType n = ValueType(1.0) + z.real(); @@ -742,41 +742,41 @@ inline complex atanh(const complex& z){ } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex acos(const complex& z){ return detail::complex::cacos(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex asin(const complex& z){ return detail::complex::casin(z); } #if( __cplusplus >= 201103L || !defined _MSC_VER) && !defined(__clang__) template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex atan(const complex& z){ return detail::complex::catan(z); } #endif template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex acosh(const complex& z){ return detail::complex::cacosh(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex asinh(const complex& z){ return detail::complex::casinh(z); } #if (THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC) && !defined(__clang__) template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex atanh(const complex& z){ return detail::complex::catanh(z); } diff --git a/thrust/detail/complex/catrigf.h b/thrust/detail/complex/catrigf.h index 43ab09442..97d3bb480 100644 --- a/thrust/detail/complex/catrigf.h +++ b/thrust/detail/complex/catrigf.h @@ -60,7 +60,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex clog_for_large_values(complex z); /* @@ -77,7 +77,7 @@ __host__ __device__ inline * a few comments on the right of declarations remain. */ -__host__ __device__ +THRUST_HOST_DEVICE inline float f(float a, float b, float hypot_a_b) { @@ -98,7 +98,7 @@ f(float a, float b, float hypot_a_b) * If returning sqrt_A2my2 has potential to result in an underflow, it is * rescaled, and new_y is similarly rescaled. */ -__host__ __device__ +THRUST_HOST_DEVICE inline void do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B, float *sqrt_A2my2, float *new_y) @@ -160,7 +160,7 @@ do_hard_work(float x, float y, float *rx, int *B_is_usable, float *B, } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex casinhf(complex z) { @@ -210,7 +210,7 @@ casinhf(complex z) return (complex(copysignf(rx, x), copysignf(ry, y))); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex casinf(complex z) { complex w = casinhf(complex(z.imag(), z.real())); @@ -218,7 +218,7 @@ complex casinf(complex z) return (complex(w.imag(), w.real())); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex cacosf(complex z) { float x, y, ax, ay, rx, ry, B, sqrt_A2mx2, new_x; @@ -282,7 +282,7 @@ complex cacosf(complex z) return (complex(rx, ry)); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex cacoshf(complex z) { complex w; @@ -307,7 +307,7 @@ complex cacoshf(complex z) /* * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex clog_for_large_values(complex z) { float x, y; @@ -349,7 +349,7 @@ complex clog_for_large_values(complex z) * Assumes y is non-negative. * Assumes fabsf(x) >= FLT_EPSILON. */ -__host__ __device__ +THRUST_HOST_DEVICE inline float sum_squares(float x, float y) { const float SQRT_MIN = 1.084202172485504434007453e-19f; /* 0x1p-63; >= sqrt(FLT_MIN) */ @@ -360,7 +360,7 @@ inline float sum_squares(float x, float y) return (x * x + y * y); } -__host__ __device__ +THRUST_HOST_DEVICE inline float real_part_reciprocal(float x, float y) { float scale; @@ -388,7 +388,7 @@ inline float real_part_reciprocal(float x, float y) } #if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex catanhf(complex z) { float x, y, ax, ay, rx, ry; @@ -444,7 +444,7 @@ complex catanhf(complex z) return (complex(copysignf(rx, x), copysignf(ry, y))); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complexcatanf(complex z){ complex w = catanhf(complex(z.imag(), z.real())); return (complex(w.imag(), w.real())); @@ -457,41 +457,41 @@ complexcatanf(complex z){ template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex acos(const complex& z){ return detail::complex::cacosf(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex asin(const complex& z){ return detail::complex::casinf(z); } #if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex atan(const complex& z){ return detail::complex::catanf(z); } #endif template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex acosh(const complex& z){ return detail::complex::cacoshf(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex asinh(const complex& z){ return detail::complex::casinhf(z); } #if THRUST_CPP_DIALECT >= 2011 || THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex atanh(const complex& z){ return detail::complex::catanhf(z); } diff --git a/thrust/detail/complex/ccosh.h b/thrust/detail/complex/ccosh.h index 722dfcd84..cde0f4cd9 100644 --- a/thrust/detail/complex/ccosh.h +++ b/thrust/detail/complex/ccosh.h @@ -66,7 +66,7 @@ namespace complex{ * These values and the return value were taken from n1124.pdf. */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline thrust::complex ccosh(const thrust::complex& z){ @@ -172,7 +172,7 @@ thrust::complex ccosh(const thrust::complex& z){ } -__host__ __device__ inline +THRUST_HOST_DEVICE inline thrust::complex ccos(const thrust::complex& z){ /* ccos(z) = ccosh(I * z) */ return (ccosh(thrust::complex(-z.imag(), z.real()))); @@ -183,7 +183,7 @@ thrust::complex ccos(const thrust::complex& z){ } // namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE inline complex cos(const complex& z){ const ValueType re = z.real(); const ValueType im = z.imag(); @@ -192,7 +192,7 @@ inline complex cos(const complex& z){ } template -__host__ __device__ +THRUST_HOST_DEVICE inline complex cosh(const complex& z){ const ValueType re = z.real(); const ValueType im = z.imag(); @@ -201,13 +201,13 @@ inline complex cosh(const complex& z){ } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline thrust::complex cos(const thrust::complex& z){ return detail::complex::ccos(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline thrust::complex cosh(const thrust::complex& z){ return detail::complex::ccosh(z); } diff --git a/thrust/detail/complex/ccoshf.h b/thrust/detail/complex/ccoshf.h index aa43f1208..90141cacf 100644 --- a/thrust/detail/complex/ccoshf.h +++ b/thrust/detail/complex/ccoshf.h @@ -59,7 +59,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex ccoshf(const complex& z){ float x, y, h; uint32_t hx, hy, ix, iy; @@ -119,7 +119,7 @@ complex ccoshf(const complex& z){ return (complex((x * x) * (y - y), (x + x) * (y - y))); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex ccosf(const complex& z){ return (ccoshf(complex(-z.imag(), z.real()))); } @@ -129,13 +129,13 @@ complex ccosf(const complex& z){ } // namespace detail template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex cos(const complex& z){ return detail::complex::ccosf(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex cosh(const complex& z){ return detail::complex::ccoshf(z); } diff --git a/thrust/detail/complex/cexp.h b/thrust/detail/complex/cexp.h index 5e2a45b1c..ac07fd0e3 100644 --- a/thrust/detail/complex/cexp.h +++ b/thrust/detail/complex/cexp.h @@ -1,7 +1,7 @@ /* * Copyright 2008-2013 NVIDIA Corporation * Copyright 2013 Filipe RNC Maia - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -65,7 +65,7 @@ namespace complex{ * Input: ln(DBL_MAX) <= x < ln(2 * DBL_MAX / DBL_MIN_DENORM) ~= 1454.91 * Output: 2**1023 <= y < 2**1024 */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline double frexp_exp(double x, int *expt){ const uint32_t k = 1799; /* constant for reduction */ const double kln2 = 1246.97177782734161156; /* k * ln2 */ @@ -87,7 +87,7 @@ __host__ __device__ inline } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex ldexp_cexp(complex z, int expt){ double x, y, exp_x, scale1, scale2; int ex_expt, half_expt; @@ -110,7 +110,7 @@ complex ldexp_cexp(complex z, int expt){ sin(y) * exp_x * scale1 * scale2)); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex cexp(const complex& z){ double x, y, exp_x; uint32_t hx, hy, lx, ly; @@ -171,13 +171,13 @@ complex cexp(const complex& z){ } // namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE inline complex exp(const complex& z){ return polar(exp(z.real()),z.imag()); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex exp(const complex& z){ return detail::complex::cexp(z); } diff --git a/thrust/detail/complex/cexpf.h b/thrust/detail/complex/cexpf.h index 2c0a5baaf..aa4db8e0c 100644 --- a/thrust/detail/complex/cexpf.h +++ b/thrust/detail/complex/cexpf.h @@ -1,7 +1,7 @@ /* * Copyright 2008-2013 NVIDIA Corporation * Copyright 2013 Filipe RNC Maia - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ THRUST_NAMESPACE_BEGIN namespace detail{ namespace complex{ -__host__ __device__ inline +THRUST_HOST_DEVICE inline float frexp_expf(float x, int *expt){ const uint32_t k = 235; /* constant for reduction */ const float kln2 = 162.88958740F; /* k * ln2 */ @@ -75,7 +75,7 @@ float frexp_expf(float x, int *expt){ return (exp_x); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex ldexp_cexpf(complex z, int expt) { @@ -96,7 +96,7 @@ ldexp_cexpf(complex z, int expt) sin(y) * exp_x * scale1 * scale2)); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex cexpf(const complex& z){ float x, y, exp_x; uint32_t hx, hy; @@ -156,7 +156,7 @@ complex cexpf(const complex& z){ } // namespace detail template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex exp(const complex& z){ return detail::complex::cexpf(z); } diff --git a/thrust/detail/complex/clog.h b/thrust/detail/complex/clog.h index d902682f1..0a6d6d551 100644 --- a/thrust/detail/complex/clog.h +++ b/thrust/detail/complex/clog.h @@ -1,7 +1,7 @@ /* * Copyright 2008-2021 NVIDIA Corporation * Copyright 2013 Filipe RNC Maia - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ namespace complex{ using thrust::complex; /* round down to 18 = 54/3 bits */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline double trim(double x){ uint32_t hi; get_high_word(hi, x); @@ -67,7 +67,7 @@ double trim(double x){ return x; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex clog(const complex& z){ // Adapted from FreeBSDs msun @@ -191,19 +191,19 @@ complex clog(const complex& z){ } // namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE inline complex log(const complex& z){ return complex(log(thrust::abs(z)),thrust::arg(z)); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex log(const complex& z){ return detail::complex::clog(z); } template -__host__ __device__ +THRUST_HOST_DEVICE inline complex log10(const complex& z){ // Using the explicit literal prevents compile time warnings in // devices that don't support doubles diff --git a/thrust/detail/complex/clogf.h b/thrust/detail/complex/clogf.h index 812e1a3fc..a6ef1f410 100644 --- a/thrust/detail/complex/clogf.h +++ b/thrust/detail/complex/clogf.h @@ -1,7 +1,7 @@ /* * Copyright 2008-2021 NVIDIA Corporation * Copyright 2013 Filipe RNC Maia - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,7 +58,7 @@ namespace complex{ using thrust::complex; /* round down to 8 = 24/3 bits */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline float trim(float x){ uint32_t hx; get_float_word(hx, x); @@ -69,7 +69,7 @@ float trim(float x){ } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex clogf(const complex& z){ // Adapted from FreeBSDs msun @@ -192,7 +192,7 @@ complex clogf(const complex& z){ } // namespace detail template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex log(const complex& z){ return detail::complex::clogf(z); } diff --git a/thrust/detail/complex/complex.inl b/thrust/detail/complex/complex.inl index bbcf26dc4..6e262fb2f 100644 --- a/thrust/detail/complex/complex.inl +++ b/thrust/detail/complex/complex.inl @@ -28,7 +28,7 @@ THRUST_NAMESPACE_BEGIN #if THRUST_CPP_DIALECT < 2011 template -__host__ __device__ +THRUST_HOST_DEVICE complex::complex() { real(T()); @@ -37,7 +37,7 @@ complex::complex() #endif template -__host__ __device__ +THRUST_HOST_DEVICE complex::complex(const T& re) #if THRUST_CPP_DIALECT >= 2011 // Initialize the storage in the member initializer list using C++ unicorn @@ -53,7 +53,7 @@ complex::complex(const T& re) template -__host__ __device__ +THRUST_HOST_DEVICE complex::complex(const T& re, const T& im) #if THRUST_CPP_DIALECT >= 2011 // Initialize the storage in the member initializer list using C++ unicorn @@ -69,7 +69,7 @@ complex::complex(const T& re, const T& im) #if THRUST_CPP_DIALECT < 2011 template -__host__ __device__ +THRUST_HOST_DEVICE complex::complex(const complex& z) { real(z.real()); @@ -79,7 +79,7 @@ complex::complex(const complex& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex::complex(const complex& z) #if THRUST_CPP_DIALECT >= 2011 // Initialize the storage in the member initializer list using C++ unicorn @@ -95,7 +95,7 @@ complex::complex(const complex& z) #endif template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex::complex(const std::complex& z) #if THRUST_CPP_DIALECT >= 2011 // Initialize the storage in the member initializer list using C++ unicorn @@ -111,7 +111,7 @@ complex::complex(const std::complex& z) template template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex::complex(const std::complex& z) #if THRUST_CPP_DIALECT >= 2011 // Initialize the storage in the member initializer list using C++ unicorn @@ -131,7 +131,7 @@ complex::complex(const std::complex& z) /* --- Assignment Operators --- */ template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator=(const T& re) { real(re); @@ -141,7 +141,7 @@ complex& complex::operator=(const T& re) #if THRUST_CPP_DIALECT < 2011 template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator=(const complex& z) { real(z.real()); @@ -152,7 +152,7 @@ complex& complex::operator=(const complex& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator=(const complex& z) { real(T(z.real())); @@ -161,7 +161,7 @@ complex& complex::operator=(const complex& z) } template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex& complex::operator=(const std::complex& z) { real(THRUST_STD_COMPLEX_REAL(z)); @@ -171,7 +171,7 @@ complex& complex::operator=(const std::complex& z) template template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex& complex::operator=(const std::complex& z) { real(T(THRUST_STD_COMPLEX_REAL(z))); @@ -185,7 +185,7 @@ complex& complex::operator=(const std::complex& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator+=(const complex& z) { *this = *this + z; @@ -194,7 +194,7 @@ complex& complex::operator+=(const complex& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator-=(const complex& z) { *this = *this - z; @@ -203,7 +203,7 @@ complex& complex::operator-=(const complex& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator*=(const complex& z) { *this = *this * z; @@ -212,7 +212,7 @@ complex& complex::operator*=(const complex& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator/=(const complex& z) { *this = *this / z; @@ -221,7 +221,7 @@ complex& complex::operator/=(const complex& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator+=(const U& z) { *this = *this + z; @@ -230,7 +230,7 @@ complex& complex::operator+=(const U& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator-=(const U& z) { *this = *this - z; @@ -239,7 +239,7 @@ complex& complex::operator-=(const U& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator*=(const U& z) { *this = *this * z; @@ -248,7 +248,7 @@ complex& complex::operator*=(const U& z) template template -__host__ __device__ +THRUST_HOST_DEVICE complex& complex::operator/=(const U& z) { *this = *this / z; @@ -260,70 +260,70 @@ complex& complex::operator/=(const U& z) /* --- Equality Operators --- */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const complex& x, const complex& y) { return x.real() == y.real() && x.imag() == y.imag(); } template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator==(const complex& x, const std::complex& y) { return x.real() == THRUST_STD_COMPLEX_REAL(y) && x.imag() == THRUST_STD_COMPLEX_IMAG(y); } template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator==(const std::complex& x, const complex& y) { return THRUST_STD_COMPLEX_REAL(x) == y.real() && THRUST_STD_COMPLEX_IMAG(x) == y.imag(); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const T0& x, const complex& y) { return x == y.real() && y.imag() == T1(); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const complex& x, const T1& y) { return x.real() == y && x.imag() == T1(); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const complex& x, const complex& y) { return !(x == y); } template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator!=(const complex& x, const std::complex& y) { return !(x == y); } template -__host__ THRUST_STD_COMPLEX_DEVICE +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator!=(const std::complex& x, const complex& y) { return !(x == y); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const T0& x, const complex& y) { return !(x == y); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const complex& x, const T1& y) { return !(x == y); diff --git a/thrust/detail/complex/cpow.h b/thrust/detail/complex/cpow.h index 73ea5762c..f7f5096ed 100644 --- a/thrust/detail/complex/cpow.h +++ b/thrust/detail/complex/cpow.h @@ -1,7 +1,7 @@ /* * Copyright 2008-2013 NVIDIA Corporation * Copyright 2013 Filipe RNC Maia - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> pow(const complex& x, const complex& y) { @@ -36,7 +36,7 @@ pow(const complex& x, const complex& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> pow(const complex& x, const T1& y) { @@ -45,7 +45,7 @@ pow(const complex& x, const T1& y) } template -__host__ __device__ +THRUST_HOST_DEVICE complex::type> pow(const T0& x, const complex& y) { diff --git a/thrust/detail/complex/cproj.h b/thrust/detail/complex/cproj.h index 7537c99fd..0a1a3f64d 100644 --- a/thrust/detail/complex/cproj.h +++ b/thrust/detail/complex/cproj.h @@ -26,7 +26,7 @@ THRUST_NAMESPACE_BEGIN namespace detail{ namespace complex{ -__host__ __device__ +THRUST_HOST_DEVICE inline complex cprojf(const complex& z){ if(!isinf(z.real()) && !isinf(z.imag())){ return z; @@ -36,7 +36,7 @@ inline complex cprojf(const complex& z){ } } -__host__ __device__ +THRUST_HOST_DEVICE inline complex cproj(const complex& z){ if(!isinf(z.real()) && !isinf(z.imag())){ return z; @@ -51,20 +51,20 @@ inline complex cproj(const complex& z){ } template -__host__ __device__ +THRUST_HOST_DEVICE inline thrust::complex proj(const thrust::complex& z){ return detail::complex::cproj(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline thrust::complex proj(const thrust::complex& z){ return detail::complex::cproj(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline thrust::complex proj(const thrust::complex& z){ return detail::complex::cprojf(z); } diff --git a/thrust/detail/complex/csinh.h b/thrust/detail/complex/csinh.h index b5a22af01..83edc3a68 100644 --- a/thrust/detail/complex/csinh.h +++ b/thrust/detail/complex/csinh.h @@ -59,7 +59,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex csinh(const complex& z){ double x, y, h; uint32_t hx, hy, ix, iy, lx, ly; @@ -162,7 +162,7 @@ complex csinh(const complex& z){ return (complex((x * x) * (y - y), (x + x) * (y - y))); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex csin(complex z){ /* csin(z) = -I * csinh(I * z) */ z = csinh(complex(-z.imag(), z.real())); @@ -174,7 +174,7 @@ complex csin(complex z){ } // namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE inline complex sin(const complex& z){ const ValueType re = z.real(); const ValueType im = z.imag(); @@ -184,7 +184,7 @@ inline complex sin(const complex& z){ template -__host__ __device__ +THRUST_HOST_DEVICE inline complex sinh(const complex& z){ const ValueType re = z.real(); const ValueType im = z.imag(); @@ -193,13 +193,13 @@ inline complex sinh(const complex& z){ } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex sin(const complex& z){ return detail::complex::csin(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex sinh(const complex& z){ return detail::complex::csinh(z); } diff --git a/thrust/detail/complex/csinhf.h b/thrust/detail/complex/csinhf.h index d271081c6..4126a89c8 100644 --- a/thrust/detail/complex/csinhf.h +++ b/thrust/detail/complex/csinhf.h @@ -59,7 +59,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex csinhf(const complex& z){ float x, y, h; @@ -119,7 +119,7 @@ complex csinhf(const complex& z){ return (complex((x * x) * (y - y), (x + x) * (y - y))); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex csinf(complex z){ z = csinhf(complex(-z.imag(), z.real())); return (complex(z.imag(), -z.real())); @@ -130,13 +130,13 @@ complex csinf(complex z){ } // namespace detail template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex sin(const complex& z){ return detail::complex::csinf(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex sinh(const complex& z){ return detail::complex::csinhf(z); } diff --git a/thrust/detail/complex/csqrt.h b/thrust/detail/complex/csqrt.h index eb4da5289..0560c711e 100644 --- a/thrust/detail/complex/csqrt.h +++ b/thrust/detail/complex/csqrt.h @@ -61,7 +61,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex csqrt(const complex& z){ complex result; double a, b; @@ -140,13 +140,13 @@ complex csqrt(const complex& z){ } // namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE inline complex sqrt(const complex& z){ return thrust::polar(std::sqrt(thrust::abs(z)),thrust::arg(z)/ValueType(2)); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex sqrt(const complex& z){ return detail::complex::csqrt(z); } diff --git a/thrust/detail/complex/csqrtf.h b/thrust/detail/complex/csqrtf.h index dba489a33..62ce91365 100644 --- a/thrust/detail/complex/csqrtf.h +++ b/thrust/detail/complex/csqrtf.h @@ -61,7 +61,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex csqrtf(const complex& z){ float a = z.real(), b = z.imag(); float t; @@ -141,7 +141,7 @@ complex csqrtf(const complex& z){ } // namespace detail template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex sqrt(const complex& z){ return detail::complex::csqrtf(z); } diff --git a/thrust/detail/complex/ctanh.h b/thrust/detail/complex/ctanh.h index 3275c0343..d8dc8dd80 100644 --- a/thrust/detail/complex/ctanh.h +++ b/thrust/detail/complex/ctanh.h @@ -99,7 +99,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex ctanh(const complex& z){ double x, y; double t, beta, s, rho, denom; @@ -161,7 +161,7 @@ complex ctanh(const complex& z){ return (complex((beta * rho * s) / denom, t / denom)); } -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex ctan(complex z){ /* ctan(z) = -I * ctanh(I * z) */ z = ctanh(complex(-z.imag(), z.real())); @@ -174,13 +174,13 @@ complex ctan(complex z){ template -__host__ __device__ +THRUST_HOST_DEVICE inline complex tan(const complex& z){ return sin(z)/cos(z); } template -__host__ __device__ +THRUST_HOST_DEVICE inline complex tanh(const complex& z){ // This implementation seems better than the simple sin/cos return (thrust::exp(ValueType(2)*z)-ValueType(1))/ @@ -188,13 +188,13 @@ inline complex tanh(const complex& z){ } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex tan(const complex& z){ return detail::complex::ctan(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex tanh(const complex& z){ return detail::complex::ctanh(z); } diff --git a/thrust/detail/complex/ctanhf.h b/thrust/detail/complex/ctanhf.h index 221b5ce47..0ea22e84d 100644 --- a/thrust/detail/complex/ctanhf.h +++ b/thrust/detail/complex/ctanhf.h @@ -64,7 +64,7 @@ namespace complex{ using thrust::complex; -__host__ __device__ inline +THRUST_HOST_DEVICE inline complex ctanhf(const complex& z){ float x, y; float t, beta, s, rho, denom; @@ -101,7 +101,7 @@ complex ctanhf(const complex& z){ return (complex((beta * rho * s) / denom, t / denom)); } - __host__ __device__ inline + THRUST_HOST_DEVICE inline complex ctanf(complex z){ z = ctanhf(complex(-z.imag(), z.real())); return (complex(z.imag(), -z.real())); @@ -112,13 +112,13 @@ complex ctanhf(const complex& z){ } // namespace detail template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex tan(const complex& z){ return detail::complex::ctanf(z); } template <> -__host__ __device__ +THRUST_HOST_DEVICE inline complex tanh(const complex& z){ return detail::complex::ctanhf(z); } diff --git a/thrust/detail/complex/math_private.h b/thrust/detail/complex/math_private.h index 3a40c8e72..4d28e1b35 100644 --- a/thrust/detail/complex/math_private.h +++ b/thrust/detail/complex/math_private.h @@ -47,21 +47,21 @@ typedef union uint32_t word; } ieee_float_shape_type; -__host__ __device__ +THRUST_HOST_DEVICE inline void get_float_word(uint32_t & i, float d){ ieee_float_shape_type gf_u; gf_u.value = (d); (i) = gf_u.word; } -__host__ __device__ +THRUST_HOST_DEVICE inline void get_float_word(int32_t & i, float d){ ieee_float_shape_type gf_u; gf_u.value = (d); (i) = gf_u.word; } -__host__ __device__ +THRUST_HOST_DEVICE inline void set_float_word(float & d, uint32_t i){ ieee_float_shape_type sf_u; sf_u.word = (i); @@ -83,7 +83,7 @@ typedef union } xparts; } ieee_double_shape_type; -__host__ __device__ inline +THRUST_HOST_DEVICE inline void get_high_word(uint32_t & i,double d){ ieee_double_shape_type gh_u; gh_u.value = (d); @@ -91,7 +91,7 @@ void get_high_word(uint32_t & i,double d){ } /* Set the more significant 32 bits of a double from an int. */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline void set_high_word(double & d, uint32_t v){ ieee_double_shape_type sh_u; sh_u.value = (d); @@ -100,7 +100,7 @@ void set_high_word(double & d, uint32_t v){ } -__host__ __device__ inline +THRUST_HOST_DEVICE inline void insert_words(double & d, uint32_t ix0, uint32_t ix1){ ieee_double_shape_type iw_u; iw_u.parts.msw = (ix0); @@ -109,7 +109,7 @@ void insert_words(double & d, uint32_t ix0, uint32_t ix1){ } /* Get two 32 bit ints from a double. */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline void extract_words(uint32_t & ix0,uint32_t & ix1, double d){ ieee_double_shape_type ew_u; ew_u.value = (d); @@ -118,7 +118,7 @@ void extract_words(uint32_t & ix0,uint32_t & ix1, double d){ } /* Get two 32 bit ints from a double. */ -__host__ __device__ inline +THRUST_HOST_DEVICE inline void extract_words(int32_t & ix0,int32_t & ix1, double d){ ieee_double_shape_type ew_u; ew_u.value = (d); diff --git a/thrust/detail/config/config.h b/thrust/detail/config/config.h index 3ae3a3ba7..d56878e9e 100644 --- a/thrust/detail/config/config.h +++ b/thrust/detail/config/config.h @@ -31,9 +31,7 @@ // because other config headers depend on it #include #include -#include +#include #include -#include -#include #include #include diff --git a/thrust/detail/config/cpp_compatibility.h b/thrust/detail/config/cpp_compatibility.h index f399e72f0..84c8fdd45 100644 --- a/thrust/detail/config/cpp_compatibility.h +++ b/thrust/detail/config/cpp_compatibility.h @@ -56,10 +56,10 @@ //# define THRUST_INLINE_CONSTANT inline constexpr //# define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT inline constexpr # if THRUST_CPP_DIALECT >= 2011 -# define THRUST_INLINE_CONSTANT static const __device__ +# define THRUST_INLINE_CONSTANT static const _CCCL_DEVICE # define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT static constexpr # else -# define THRUST_INLINE_CONSTANT static const __device__ +# define THRUST_INLINE_CONSTANT static const _CCCL_DEVICE # define THRUST_INLINE_INTEGRAL_MEMBER_CONSTANT static const # endif #else diff --git a/thrust/detail/config/exec_check_disable.h b/thrust/detail/config/exec_check_disable.h deleted file mode 100644 index 7682f97b8..000000000 --- a/thrust/detail/config/exec_check_disable.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2008-2013 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file exec_check_disable.h - * \brief Defines __thrust_exec_check_disable__ - */ - - #pragma once - - #include - -// #pragma nv_exec_check_disable is only recognized by NVCC. Having a macro -// expand to a #pragma (rather than _Pragma) only works with NVCC's compilation -// model, not with other compilers. -#if defined(__CUDACC__) && !defined(_NVHPC_CUDA) && \ - !(defined(__CUDA__) && defined(__clang__)) - - #if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC - #define __thrust_exec_check_disable__ __pragma("nv_exec_check_disable") - #else // MSVC - #define __thrust_exec_check_disable__ _Pragma("nv_exec_check_disable") - #endif // MSVC - - #else - - #define __thrust_exec_check_disable__ - - #endif diff --git a/thrust/detail/config/execution_space.h b/thrust/detail/config/execution_space.h new file mode 100644 index 000000000..d691907fe --- /dev/null +++ b/thrust/detail/config/execution_space.h @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +// SPDX-FileCopyrightText: Modifications Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +//===----------------------------------------------------------------------===// + +#ifndef THRUST_DETAIL_CONFIG_EXECUTION_SPACE_H +#define THRUST_DETAIL_CONFIG_EXECUTION_SPACE_H + +#include + +#if (THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC) && (THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_HIP) +#define THRUST_HOST +#define THRUST_DEVICE +#define THRUST_HOST_DEVICE +#define THRUST_FORCEINLINE +#else +#define THRUST_HOST __host__ +#define THRUST_DEVICE __device__ +#define THRUST_HOST_DEVICE __host__ __device__ +#define THRUST_FORCEINLINE __forceinline__ +#endif + +#if !defined(THRUST_EXEC_CHECK_DISABLE) +# if defined(_CCCL_CUDA_COMPILER_NVCC) +# if defined(_CCCL_COMPILER_MSVC) +# define THRUST_EXEC_CHECK_DISABLE __pragma("nv_exec_check_disable") +# else // ^^^ _CCCL_COMPILER_MSVC ^^^ / vvv !_CCCL_COMPILER_MSVC vvv +# define THRUST_EXEC_CHECK_DISABLE _Pragma("nv_exec_check_disable") +# endif // !_CCCL_COMPILER_MSVC +# else +# define THRUST_EXEC_CHECK_DISABLE +# endif // _CCCL_CUDA_COMPILER_NVCC +#endif // !THRUST_EXEC_CHECK_DISABLE + +#endif // THRUST_DETAIL_CONFIG_EXECUTION_SPACE_H \ No newline at end of file diff --git a/thrust/detail/config/forceinline.h b/thrust/detail/config/forceinline.h deleted file mode 100644 index 5ce645b36..000000000 --- a/thrust/detail/config/forceinline.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file forceinline.h - * \brief Defines __thrust_forceinline__ - */ - -#pragma once - -// Internal config header that is only included through thrust/detail/config/config.h - - -#if defined(__CUDACC__) || defined(_NVHPC_CUDA) - -#define __thrust_forceinline__ __forceinline__ - -#elif THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_HIP - -#define __thrust_forceinline__ __forceinline__ - -#else - -// TODO add - -#define __thrust_forceinline__ - -#endif diff --git a/thrust/detail/config/host_device.h b/thrust/detail/config/host_device.h deleted file mode 100644 index de33aefb6..000000000 --- a/thrust/detail/config/host_device.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file host_device.h - * \brief Defines __host__ and __device__ - */ - -#pragma once - -#include - -// since nvcc defines __host__ and __device__ for us, -// and only nvcc knows what to do with __host__ and __device__, -// define them to be the empty string for other compilers - -#if (THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC) && (THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_HIP) - -// since __host__ & __device__ might have already be defined, only -// #define them if not defined already -// XXX this will break if the client does #include later - -#ifndef __host__ -#define __host__ -#endif // __host__ - -#ifndef __device__ -#define __device__ -#endif // __device__ - -#endif - diff --git a/thrust/detail/contiguous_storage.h b/thrust/detail/contiguous_storage.h index 2f76d03fd..33bf36d19 100644 --- a/thrust/detail/contiguous_storage.h +++ b/thrust/detail/contiguous_storage.h @@ -49,128 +49,128 @@ template typedef thrust::detail::normal_iterator iterator; typedef thrust::detail::normal_iterator const_iterator; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE explicit contiguous_storage(const allocator_type &alloc = allocator_type()); - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE explicit contiguous_storage(size_type n, const allocator_type &alloc = allocator_type()); - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE explicit contiguous_storage(copy_allocator_t, const contiguous_storage &other); - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE explicit contiguous_storage(copy_allocator_t, const contiguous_storage &other, size_type n); - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE ~contiguous_storage(); - __host__ __device__ + THRUST_HOST_DEVICE size_type size() const; - __host__ __device__ + THRUST_HOST_DEVICE size_type max_size() const; - __host__ __device__ + THRUST_HOST_DEVICE pointer data(); - __host__ __device__ + THRUST_HOST_DEVICE const_pointer data() const; - __host__ __device__ + THRUST_HOST_DEVICE iterator begin(); - __host__ __device__ + THRUST_HOST_DEVICE const_iterator begin() const; - __host__ __device__ + THRUST_HOST_DEVICE iterator end(); - __host__ __device__ + THRUST_HOST_DEVICE const_iterator end() const; - __host__ __device__ + THRUST_HOST_DEVICE reference operator[](size_type n); - __host__ __device__ + THRUST_HOST_DEVICE const_reference operator[](size_type n) const; - __host__ __device__ + THRUST_HOST_DEVICE allocator_type get_allocator() const; // note that allocate does *not* automatically call deallocate - __host__ __device__ + THRUST_HOST_DEVICE void allocate(size_type n); - __host__ __device__ + THRUST_HOST_DEVICE void deallocate(); - __host__ __device__ + THRUST_HOST_DEVICE void swap(contiguous_storage &x); - __host__ __device__ + THRUST_HOST_DEVICE void default_construct_n(iterator first, size_type n); - __host__ __device__ + THRUST_HOST_DEVICE void uninitialized_fill_n(iterator first, size_type n, const value_type &value); template - __host__ __device__ + THRUST_HOST_DEVICE iterator uninitialized_copy(InputIterator first, InputIterator last, iterator result); template - __host__ __device__ + THRUST_HOST_DEVICE iterator uninitialized_copy(thrust::execution_policy &from_system, InputIterator first, InputIterator last, iterator result); template - __host__ __device__ + THRUST_HOST_DEVICE iterator uninitialized_copy_n(InputIterator first, Size n, iterator result); template - __host__ __device__ + THRUST_HOST_DEVICE iterator uninitialized_copy_n(thrust::execution_policy &from_system, InputIterator first, Size n, iterator result); - __host__ __device__ + THRUST_HOST_DEVICE void destroy(iterator first, iterator last); - __host__ __device__ + THRUST_HOST_DEVICE void deallocate_on_allocator_mismatch(const contiguous_storage &other); - __host__ __device__ + THRUST_HOST_DEVICE void destroy_on_allocator_mismatch(const contiguous_storage &other, iterator first, iterator last); - __host__ __device__ + THRUST_HOST_DEVICE void set_allocator(const allocator_type &alloc); - __host__ __device__ + THRUST_HOST_DEVICE bool is_allocator_not_equal(const allocator_type &alloc) const; - __host__ __device__ + THRUST_HOST_DEVICE bool is_allocator_not_equal(const contiguous_storage &other) const; - __host__ __device__ + THRUST_HOST_DEVICE void propagate_allocator(const contiguous_storage &other); #if THRUST_CPP_DIALECT >= 2011 - __host__ __device__ + THRUST_HOST_DEVICE void propagate_allocator(contiguous_storage &other); // allow move assignment for a sane implementation of allocator propagation // on move assignment - __host__ __device__ + THRUST_HOST_DEVICE contiguous_storage &operator=(contiguous_storage &&other); -#endif +#endif // THRUST_CPP_DIALECT >= 2011 private: // XXX we could inherit from this to take advantage of empty base class optimization @@ -183,51 +183,51 @@ template // disallow assignment contiguous_storage &operator=(const contiguous_storage &x); - __host__ __device__ + THRUST_HOST_DEVICE void swap_allocators(true_type, const allocator_type &); - __host__ __device__ + THRUST_HOST_DEVICE void swap_allocators(false_type, allocator_type &); - __host__ __device__ + THRUST_HOST_DEVICE bool is_allocator_not_equal_dispatch(true_type, const allocator_type &) const; - __host__ __device__ + THRUST_HOST_DEVICE bool is_allocator_not_equal_dispatch(false_type, const allocator_type &) const; - __host__ __device__ + THRUST_HOST_DEVICE void deallocate_on_allocator_mismatch_dispatch(true_type, const contiguous_storage &other); - __host__ __device__ + THRUST_HOST_DEVICE void deallocate_on_allocator_mismatch_dispatch(false_type, const contiguous_storage &other); - __host__ __device__ + THRUST_HOST_DEVICE void destroy_on_allocator_mismatch_dispatch(true_type, const contiguous_storage &other, iterator first, iterator last); - __host__ __device__ + THRUST_HOST_DEVICE void destroy_on_allocator_mismatch_dispatch(false_type, const contiguous_storage &other, iterator first, iterator last); - __host__ __device__ + THRUST_HOST_DEVICE void propagate_allocator_dispatch(true_type, const contiguous_storage &other); - __host__ __device__ + THRUST_HOST_DEVICE void propagate_allocator_dispatch(false_type, const contiguous_storage &other); #if THRUST_CPP_DIALECT >= 2011 - __host__ __device__ + THRUST_HOST_DEVICE void propagate_allocator_dispatch(true_type, contiguous_storage &other); - __host__ __device__ + THRUST_HOST_DEVICE void propagate_allocator_dispatch(false_type, contiguous_storage &other); -#endif +#endif // THRUST_CPP_DIALECT >= 2011 }; // end contiguous_storage } // end detail template -__host__ __device__ +THRUST_HOST_DEVICE void swap(detail::contiguous_storage &lhs, detail::contiguous_storage &rhs); THRUST_NAMESPACE_END diff --git a/thrust/detail/contiguous_storage.inl b/thrust/detail/contiguous_storage.inl index 2b76a4a9c..dce33d734 100644 --- a/thrust/detail/contiguous_storage.inl +++ b/thrust/detail/contiguous_storage.inl @@ -1,6 +1,6 @@ /* * Copyright 2008-2018 NVIDIA Corporation - * Modifications Copyright© 2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2023-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,9 +45,9 @@ public: } }; -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE contiguous_storage ::contiguous_storage(const Alloc &alloc) :m_allocator(alloc), @@ -57,9 +57,9 @@ __host__ __device__ ; } // end contiguous_storage::contiguous_storage() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE contiguous_storage ::contiguous_storage(size_type n, const Alloc &alloc) :m_allocator(alloc), @@ -70,7 +70,7 @@ __host__ __device__ } // end contiguous_storage::contiguous_storage() template -__host__ __device__ +THRUST_HOST_DEVICE contiguous_storage ::contiguous_storage(copy_allocator_t, const contiguous_storage &other) @@ -81,7 +81,7 @@ __host__ __device__ } // end contiguous_storage::contiguous_storage() template -__host__ __device__ +THRUST_HOST_DEVICE contiguous_storage ::contiguous_storage(copy_allocator_t, const contiguous_storage &other, size_type n) @@ -92,9 +92,9 @@ __host__ __device__ allocate(n); } // end contiguous_storage::contiguous_storage() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE contiguous_storage ::~contiguous_storage() { @@ -102,7 +102,7 @@ __host__ __device__ } // end contiguous_storage::~contiguous_storage() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::size_type contiguous_storage ::size() const @@ -111,7 +111,7 @@ __host__ __device__ } // end contiguous_storage::size() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::size_type contiguous_storage ::max_size() const @@ -120,7 +120,7 @@ __host__ __device__ } // end contiguous_storage::max_size() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::iterator contiguous_storage ::begin() @@ -129,7 +129,7 @@ __host__ __device__ } // end contiguous_storage::begin() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::const_iterator contiguous_storage ::begin() const @@ -138,7 +138,7 @@ __host__ __device__ } // end contiguous_storage::begin() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::iterator contiguous_storage ::end() @@ -147,7 +147,7 @@ __host__ __device__ } // end contiguous_storage::end() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::const_iterator contiguous_storage ::end() const @@ -156,7 +156,7 @@ __host__ __device__ } // end contiguous_storage::end() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::pointer contiguous_storage ::data() @@ -165,7 +165,7 @@ __host__ __device__ } // end contiguous_storage::data() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::const_pointer contiguous_storage ::data() const @@ -174,7 +174,7 @@ __host__ __device__ } // end contiguous_storage::data() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::reference contiguous_storage ::operator[](size_type n) @@ -183,7 +183,7 @@ __host__ __device__ } // end contiguous_storage::operator[]() template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::const_reference contiguous_storage ::operator[](size_type n) const @@ -191,9 +191,9 @@ __host__ __device__ return m_begin[n]; } // end contiguous_storage::operator[]() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE typename contiguous_storage::allocator_type contiguous_storage ::get_allocator() const @@ -202,7 +202,7 @@ __host__ __device__ } // end contiguous_storage::get_allocator() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::allocate(size_type n) { @@ -219,7 +219,7 @@ __host__ __device__ } // end contiguous_storage::allocate() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::deallocate() { @@ -232,7 +232,7 @@ __host__ __device__ } // end contiguous_storage::deallocate() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::swap(contiguous_storage &x) { @@ -250,7 +250,7 @@ __host__ __device__ } // end contiguous_storage::swap() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::default_construct_n(iterator first, size_type n) { @@ -258,7 +258,7 @@ __host__ __device__ } // end contiguous_storage::default_construct_n() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::uninitialized_fill_n(iterator first, size_type n, const value_type &x) { @@ -267,7 +267,7 @@ __host__ __device__ template template - __host__ __device__ + THRUST_HOST_DEVICE typename contiguous_storage::iterator contiguous_storage ::uninitialized_copy(thrust::execution_policy &from_system, InputIterator first, InputIterator last, iterator result) @@ -277,7 +277,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename contiguous_storage::iterator contiguous_storage ::uninitialized_copy(InputIterator first, InputIterator last, iterator result) @@ -290,7 +290,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename contiguous_storage::iterator contiguous_storage ::uninitialized_copy_n(thrust::execution_policy &from_system, InputIterator first, Size n, iterator result) @@ -300,7 +300,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename contiguous_storage::iterator contiguous_storage ::uninitialized_copy_n(InputIterator first, Size n, iterator result) @@ -312,7 +312,7 @@ template } // end contiguous_storage::uninitialized_copy_n() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::destroy(iterator first, iterator last) { @@ -320,7 +320,7 @@ __host__ __device__ } // end contiguous_storage::destroy() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::deallocate_on_allocator_mismatch(const contiguous_storage &other) { @@ -333,7 +333,7 @@ __host__ __device__ } // end contiguous_storage::deallocate_on_allocator_mismatch template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::destroy_on_allocator_mismatch(const contiguous_storage &other, iterator first, iterator last) @@ -346,9 +346,9 @@ __host__ __device__ destroy_on_allocator_mismatch_dispatch(c, other, first, last); } // end contiguous_storage::destroy_on_allocator_mismatch -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::set_allocator(const Alloc &alloc) { @@ -356,7 +356,7 @@ __host__ __device__ } // end contiguous_storage::set_allocator() template -__host__ __device__ +THRUST_HOST_DEVICE bool contiguous_storage ::is_allocator_not_equal(const Alloc &alloc) const { @@ -369,7 +369,7 @@ __host__ __device__ } // end contiguous_storage::is_allocator_not_equal() template -__host__ __device__ +THRUST_HOST_DEVICE bool contiguous_storage ::is_allocator_not_equal(const contiguous_storage &other) const { @@ -377,7 +377,7 @@ __host__ __device__ } // end contiguous_storage::is_allocator_not_equal() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::propagate_allocator(const contiguous_storage &other) { @@ -391,7 +391,7 @@ __host__ __device__ #if THRUST_CPP_DIALECT >= 2011 template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::propagate_allocator(contiguous_storage &other) { @@ -404,7 +404,7 @@ __host__ __device__ } // end contiguous_storage::propagate_allocator() template -__host__ __device__ +THRUST_HOST_DEVICE contiguous_storage &contiguous_storage ::operator=(contiguous_storage &&other) { @@ -424,14 +424,14 @@ __host__ __device__ #endif template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::swap_allocators(true_type, const Alloc &) { } // end contiguous_storage::swap_allocators() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::swap_allocators(false_type, Alloc &other) { @@ -448,25 +448,25 @@ __host__ __device__ } // end contiguous_storage::swap_allocators() template -__host__ __device__ +THRUST_HOST_DEVICE bool contiguous_storage ::is_allocator_not_equal_dispatch(true_type /*is_always_equal*/, const Alloc &) const { return false; } // end contiguous_storage::is_allocator_not_equal_dispatch() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool contiguous_storage ::is_allocator_not_equal_dispatch(false_type /*!is_always_equal*/, const Alloc& other) const { return m_allocator != other; } // end contiguous_storage::is_allocator_not_equal_dispatch() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::deallocate_on_allocator_mismatch_dispatch(true_type, const contiguous_storage &other) { @@ -477,15 +477,15 @@ __host__ __device__ } // end contiguous_storage::deallocate_on_allocator_mismatch() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::deallocate_on_allocator_mismatch_dispatch(false_type, const contiguous_storage &) { } // end contiguous_storage::deallocate_on_allocator_mismatch() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::destroy_on_allocator_mismatch_dispatch(true_type, const contiguous_storage &other, iterator first, iterator last) @@ -497,16 +497,16 @@ __host__ __device__ } // end contiguous_storage::destroy_on_allocator_mismatch() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::destroy_on_allocator_mismatch_dispatch(false_type, const contiguous_storage &, iterator, iterator) { } // end contiguous_storage::destroy_on_allocator_mismatch() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::propagate_allocator_dispatch(true_type, const contiguous_storage &other) { @@ -514,16 +514,16 @@ __host__ __device__ } // end contiguous_storage::propagate_allocator() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::propagate_allocator_dispatch(false_type, const contiguous_storage &) { } // end contiguous_storage::propagate_allocator() #if THRUST_CPP_DIALECT >= 2011 -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::propagate_allocator_dispatch(true_type, contiguous_storage &other) { @@ -531,17 +531,17 @@ __host__ __device__ } // end contiguous_storage::propagate_allocator() template -__host__ __device__ +THRUST_HOST_DEVICE void contiguous_storage ::propagate_allocator_dispatch(false_type, contiguous_storage &) { } // end contiguous_storage::propagate_allocator() -#endif +#endif // THRUST_CPP_DIALECT >= 2011 } // end detail template -__host__ __device__ +THRUST_HOST_DEVICE void swap(detail::contiguous_storage &lhs, detail::contiguous_storage &rhs) { lhs.swap(rhs); diff --git a/thrust/detail/copy.h b/thrust/detail/copy.h index d6c5bc805..a8bc48311 100644 --- a/thrust/detail/copy.h +++ b/thrust/detail/copy.h @@ -24,7 +24,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(const thrust::detail::execution_policy_base &system, InputIterator first, InputIterator last, @@ -34,7 +34,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(const thrust::detail::execution_policy_base &system, InputIterator first, Size n, @@ -62,7 +62,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator two_system_copy(const thrust::execution_policy &from_system, const thrust::execution_policy &two_system, InputIterator first, @@ -75,7 +75,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator two_system_copy_n(const thrust::execution_policy &from_system, const thrust::execution_policy &two_system, InputIterator first, diff --git a/thrust/detail/copy.inl b/thrust/detail/copy.inl index 4d62798c7..e36931bf3 100644 --- a/thrust/detail/copy.inl +++ b/thrust/detail/copy.inl @@ -24,9 +24,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -37,9 +37,9 @@ __host__ __device__ } // end copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(const thrust::detail::execution_policy_base &exec, InputIterator first, Size n, @@ -54,12 +54,12 @@ namespace detail { -__thrust_exec_check_disable__ // because we might call e.g. std::ostream_iterator's constructor +THRUST_EXEC_CHECK_DISABLE // because we might call e.g. std::ostream_iterator's constructor template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator two_system_copy(const thrust::execution_policy &system1, const thrust::execution_policy &system2, InputIterator first, @@ -72,13 +72,13 @@ __host__ __device__ } // end two_system_copy() -__thrust_exec_check_disable__ // because we might call e.g. std::ostream_iterator's constructor +THRUST_EXEC_CHECK_DISABLE // because we might call e.g. std::ostream_iterator's constructor template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator two_system_copy_n(const thrust::execution_policy &system1, const thrust::execution_policy &system2, InputIterator first, diff --git a/thrust/detail/copy_if.h b/thrust/detail/copy_if.h index 32eb5e083..ec663c7df 100644 --- a/thrust/detail/copy_if.h +++ b/thrust/detail/copy_if.h @@ -25,7 +25,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -38,7 +38,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/detail/copy_if.inl b/thrust/detail/copy_if.inl index 952541c51..e62a04b13 100644 --- a/thrust/detail/copy_if.inl +++ b/thrust/detail/copy_if.inl @@ -25,12 +25,12 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -42,13 +42,13 @@ __host__ __device__ } // end copy_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/detail/count.h b/thrust/detail/count.h index 7c48bc546..27bf0628b 100644 --- a/thrust/detail/count.h +++ b/thrust/detail/count.h @@ -24,7 +24,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count(const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -34,7 +34,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count_if(const thrust::detail::execution_policy_base &exec, InputIterator first, diff --git a/thrust/detail/count.inl b/thrust/detail/count.inl index 5d1f628a9..39797e8af 100644 --- a/thrust/detail/count.inl +++ b/thrust/detail/count.inl @@ -25,9 +25,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, const EqualityComparable& value) { @@ -36,9 +36,9 @@ __host__ __device__ } // end count() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) { diff --git a/thrust/detail/dependencies_aware_execution_policy.h b/thrust/detail/dependencies_aware_execution_policy.h index a7567a3fa..657723478 100644 --- a/thrust/detail/dependencies_aware_execution_policy.h +++ b/thrust/detail/dependencies_aware_execution_policy.h @@ -34,7 +34,7 @@ template class ExecutionPolicyCRTPBase> struct dependencies_aware_execution_policy { template - __host__ + THRUST_HOST thrust::detail::execute_with_dependencies< ExecutionPolicyCRTPBase, Dependencies... @@ -45,7 +45,7 @@ struct dependencies_aware_execution_policy } template - __host__ + THRUST_HOST thrust::detail::execute_with_dependencies< ExecutionPolicyCRTPBase, Dependencies... @@ -55,7 +55,7 @@ struct dependencies_aware_execution_policy return { capture_as_dependency(dependencies) }; } template - __host__ + THRUST_HOST thrust::detail::execute_with_dependencies< ExecutionPolicyCRTPBase, Dependencies... @@ -66,7 +66,7 @@ struct dependencies_aware_execution_policy } template - __host__ + THRUST_HOST thrust::detail::execute_with_dependencies< ExecutionPolicyCRTPBase, Dependencies... @@ -77,7 +77,7 @@ struct dependencies_aware_execution_policy } template - __host__ + THRUST_HOST thrust::detail::execute_with_dependencies< ExecutionPolicyCRTPBase, Dependencies... @@ -87,7 +87,7 @@ struct dependencies_aware_execution_policy return { capture_as_dependency(dependencies) }; } template - __host__ + THRUST_HOST thrust::detail::execute_with_dependencies< ExecutionPolicyCRTPBase, Dependencies... diff --git a/thrust/detail/device_ptr.inl b/thrust/detail/device_ptr.inl index 361c61f33..400d34bde 100644 --- a/thrust/detail/device_ptr.inl +++ b/thrust/detail/device_ptr.inl @@ -25,14 +25,14 @@ THRUST_NAMESPACE_BEGIN template - __host__ __device__ + THRUST_HOST_DEVICE device_ptr device_pointer_cast(T *ptr) { return device_ptr(ptr); } // end device_pointer_cast() template - __host__ __device__ + THRUST_HOST_DEVICE device_ptr device_pointer_cast(const device_ptr &ptr) { return ptr; diff --git a/thrust/detail/distance.inl b/thrust/detail/distance.inl index 6702c2b6f..ebaf0b557 100644 --- a/thrust/detail/distance.inl +++ b/thrust/detail/distance.inl @@ -23,9 +23,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type distance(InputIterator first, InputIterator last) { diff --git a/thrust/detail/equal.inl b/thrust/detail/equal.inl index e21ddfa5a..5c7f29d7d 100644 --- a/thrust/detail/equal.inl +++ b/thrust/detail/equal.inl @@ -25,9 +25,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(const thrust::detail::execution_policy_base &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) { using thrust::system::detail::generic::equal; @@ -35,9 +35,9 @@ bool equal(const thrust::detail::execution_policy_base &system, InputIte } // end equal() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(const thrust::detail::execution_policy_base &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred) { using thrust::system::detail::generic::equal; diff --git a/thrust/detail/event_error.h b/thrust/detail/event_error.h index cf6116469..c27f1fb8a 100644 --- a/thrust/detail/event_error.h +++ b/thrust/detail/event_error.h @@ -125,23 +125,23 @@ inline error_condition make_error_condition(event_errc e) struct event_error : std::logic_error { - __host__ + THRUST_HOST explicit event_error(error_code ec) : std::logic_error(ec.message()), ec_(ec) {} - __host__ + THRUST_HOST explicit event_error(event_errc e) : event_error(make_error_code(e)) {} - __host__ + THRUST_HOST error_code const& code() const noexcept { return ec_; } - __host__ + THRUST_HOST virtual ~event_error() noexcept {} private: diff --git a/thrust/detail/execute_with_allocator.h b/thrust/detail/execute_with_allocator.h index 430fe739c..4652be9ef 100644 --- a/thrust/detail/execute_with_allocator.h +++ b/thrust/detail/execute_with_allocator.h @@ -35,7 +35,7 @@ template < , typename Allocator , template class BaseSystem > -__host__ +THRUST_HOST thrust::pair get_temporary_buffer( thrust::detail::execute_with_allocator& system @@ -63,7 +63,7 @@ template < , typename Allocator , template class BaseSystem > -__host__ +THRUST_HOST void return_temporary_buffer( thrust::detail::execute_with_allocator& system @@ -92,7 +92,7 @@ template < typename Allocator, typename ...Dependencies > -__host__ +THRUST_HOST thrust::pair get_temporary_buffer( thrust::detail::execute_with_allocator_and_dependencies& system, @@ -121,7 +121,7 @@ template < typename Allocator, typename ...Dependencies > -__host__ +THRUST_HOST void return_temporary_buffer( thrust::detail::execute_with_allocator_and_dependencies& system, diff --git a/thrust/detail/execute_with_allocator_fwd.h b/thrust/detail/execute_with_allocator_fwd.h index 1d5899a7d..81cf39d05 100644 --- a/thrust/detail/execute_with_allocator_fwd.h +++ b/thrust/detail/execute_with_allocator_fwd.h @@ -39,13 +39,13 @@ struct execute_with_allocator Allocator alloc; public: - __host__ __device__ + THRUST_HOST_DEVICE execute_with_allocator(super_t const& super, Allocator alloc_) : super_t(super), alloc(alloc_) {} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE execute_with_allocator(Allocator alloc_) : alloc(alloc_) {} @@ -54,7 +54,7 @@ struct execute_with_allocator #if THRUST_CPP_DIALECT >= 2011 template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies after(Dependencies&& ...dependencies) const { @@ -62,14 +62,14 @@ struct execute_with_allocator } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies after(std::tuple& dependencies) const { return { alloc, capture_as_dependency(dependencies) }; } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies after(std::tuple&& dependencies) const { @@ -77,7 +77,7 @@ struct execute_with_allocator } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies rebind_after(Dependencies&& ...dependencies) const { @@ -85,14 +85,14 @@ struct execute_with_allocator } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies rebind_after(std::tuple& dependencies) const { return { alloc, capture_as_dependency(dependencies) }; } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies rebind_after(std::tuple&& dependencies) const { diff --git a/thrust/detail/execute_with_dependencies.h b/thrust/detail/execute_with_dependencies.h index b8af85eff..4f6b35919 100644 --- a/thrust/detail/execute_with_dependencies.h +++ b/thrust/detail/execute_with_dependencies.h @@ -60,42 +60,42 @@ struct execute_with_dependencies std::tuple...> dependencies; public: - __host__ + THRUST_HOST execute_with_dependencies(super_t const &super, Dependencies && ...deps) : super_t(super), dependencies(std::forward(deps)...) { } template - __host__ + THRUST_HOST execute_with_dependencies(super_t const &super, UDependencies && ...deps) : super_t(super), dependencies(THRUST_FWD(deps)...) { } template - __host__ + THRUST_HOST execute_with_dependencies(UDependencies && ...deps) : dependencies(THRUST_FWD(deps)...) { } template - __host__ + THRUST_HOST execute_with_dependencies(super_t const &super, std::tuple&& deps) : super_t(super), dependencies(std::move(deps)) { } template - __host__ + THRUST_HOST execute_with_dependencies(std::tuple&& deps) : dependencies(std::move(deps)) { } std::tuple...> - __host__ + THRUST_HOST extract_dependencies() { return std::move(dependencies); @@ -103,7 +103,7 @@ struct execute_with_dependencies // Rebinding. template - __host__ + THRUST_HOST execute_with_dependencies rebind_after(UDependencies&& ...udependencies) const { @@ -112,14 +112,14 @@ struct execute_with_dependencies // Rebinding. template - __host__ + THRUST_HOST execute_with_dependencies rebind_after(std::tuple& udependencies) const { return { capture_as_dependency(udependencies) }; } template - __host__ + THRUST_HOST execute_with_dependencies rebind_after(std::tuple&& udependencies) const { @@ -155,41 +155,41 @@ struct execute_with_allocator_and_dependencies public: template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies(super_t const &super, Allocator a, UDependencies && ...deps) : super_t(super), dependencies(THRUST_FWD(deps)...), alloc(a) { } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies(Allocator a, UDependencies && ...deps) : dependencies(THRUST_FWD(deps)...), alloc(a) { } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies(super_t const &super, Allocator a, std::tuple&& deps) : super_t(super), dependencies(std::move(deps)), alloc(a) { } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies(Allocator a, std::tuple&& deps) : dependencies(std::move(deps)), alloc(a) { } std::tuple...> - __host__ + THRUST_HOST extract_dependencies() { return std::move(dependencies); } - __host__ + THRUST_HOST typename std::add_lvalue_reference::type get_allocator() { @@ -198,7 +198,7 @@ struct execute_with_allocator_and_dependencies // Rebinding. template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies rebind_after(UDependencies&& ...udependencies) const { @@ -207,14 +207,14 @@ struct execute_with_allocator_and_dependencies // Rebinding. template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies rebind_after(std::tuple& udependencies) const { return { alloc, capture_as_dependency(udependencies) }; } template - __host__ + THRUST_HOST execute_with_allocator_and_dependencies rebind_after(std::tuple&& udependencies) const { @@ -223,14 +223,14 @@ struct execute_with_allocator_and_dependencies }; template class BaseSystem, typename ...Dependencies> -__host__ +THRUST_HOST std::tuple...> extract_dependencies(thrust::detail::execute_with_dependencies&& system) { return std::move(system).extract_dependencies(); } template class BaseSystem, typename ...Dependencies> -__host__ +THRUST_HOST std::tuple...> extract_dependencies(thrust::detail::execute_with_dependencies& system) { @@ -238,14 +238,14 @@ extract_dependencies(thrust::detail::execute_with_dependencies class BaseSystem, typename ...Dependencies> -__host__ +THRUST_HOST std::tuple...> extract_dependencies(thrust::detail::execute_with_allocator_and_dependencies&& system) { return std::move(system).extract_dependencies(); } template class BaseSystem, typename ...Dependencies> -__host__ +THRUST_HOST std::tuple...> extract_dependencies(thrust::detail::execute_with_allocator_and_dependencies& system) { @@ -253,7 +253,7 @@ extract_dependencies(thrust::detail::execute_with_allocator_and_dependencies -__host__ +THRUST_HOST std::tuple<> extract_dependencies(System &&) { diff --git a/thrust/detail/execution_policy.h b/thrust/detail/execution_policy.h index dcc11a770..a9bba0a60 100644 --- a/thrust/detail/execution_policy.h +++ b/thrust/detail/execution_policy.h @@ -44,7 +44,7 @@ struct execution_policy_base : execution_policy_marker {}; template -constexpr __host__ __device__ +constexpr THRUST_HOST_DEVICE execution_policy_base &strip_const(const execution_policy_base &x) { return const_cast&>(x); @@ -52,7 +52,7 @@ execution_policy_base &strip_const(const execution_policy_base -constexpr __host__ __device__ +constexpr THRUST_HOST_DEVICE DerivedPolicy &derived_cast(execution_policy_base &x) { return static_cast(x); @@ -60,7 +60,7 @@ DerivedPolicy &derived_cast(execution_policy_base &x) template -constexpr __host__ __device__ +constexpr THRUST_HOST_DEVICE const DerivedPolicy &derived_cast(const execution_policy_base &x) { return static_cast(x); diff --git a/thrust/detail/extrema.inl b/thrust/detail/extrema.inl index 2c1750e7d..bb24c22c5 100644 --- a/thrust/detail/extrema.inl +++ b/thrust/detail/extrema.inl @@ -25,9 +25,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) { using thrust::system::detail::generic::min_element; @@ -35,9 +35,9 @@ ForwardIterator min_element(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { using thrust::system::detail::generic::min_element; @@ -45,9 +45,9 @@ ForwardIterator min_element(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) { using thrust::system::detail::generic::max_element; @@ -55,9 +55,9 @@ ForwardIterator max_element(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { using thrust::system::detail::generic::max_element; @@ -65,9 +65,9 @@ ForwardIterator max_element(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) { using thrust::system::detail::generic::minmax_element; @@ -75,9 +75,9 @@ thrust::pair minmax_element(const thrust::detai } // end minmax_element() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) { using thrust::system::detail::generic::minmax_element; diff --git a/thrust/detail/fill.inl b/thrust/detail/fill.inl index e68672bbe..3d5dab60e 100644 --- a/thrust/detail/fill.inl +++ b/thrust/detail/fill.inl @@ -26,9 +26,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void fill(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -39,9 +39,9 @@ __host__ __device__ } // end fill() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator fill_n(const thrust::detail::execution_policy_base &exec, OutputIterator first, Size n, @@ -53,7 +53,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void fill(ForwardIterator first, ForwardIterator last, const T &value) @@ -69,7 +69,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator fill_n(OutputIterator first, Size n, const T &value) diff --git a/thrust/detail/find.inl b/thrust/detail/find.inl index 5b494f61a..bb8600800 100644 --- a/thrust/detail/find.inl +++ b/thrust/detail/find.inl @@ -24,9 +24,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -37,9 +37,9 @@ InputIterator find(const thrust::detail::execution_policy_base &e } // end find() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -50,9 +50,9 @@ InputIterator find_if(const thrust::detail::execution_policy_base } // end find_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if_not(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, diff --git a/thrust/detail/for_each.inl b/thrust/detail/for_each.inl index 4ba39c71a..4999032ce 100644 --- a/thrust/detail/for_each.inl +++ b/thrust/detail/for_each.inl @@ -25,11 +25,11 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -54,9 +54,9 @@ InputIterator for_each(InputIterator first, return thrust::for_each(select_system(system), first, last, f); } // end for_each() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each_n(const thrust::detail::execution_policy_base &exec, InputIterator first, Size n, diff --git a/thrust/detail/function.h b/thrust/detail/function.h index fa80d5803..c57ac3d68 100644 --- a/thrust/detail/function.h +++ b/thrust/detail/function.h @@ -30,62 +30,62 @@ template // mutable because Function::operator() might be const mutable Function m_f; - inline __host__ __device__ + inline THRUST_HOST_DEVICE wrapped_function() : m_f() {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE wrapped_function(const Function& f) : m_f(f) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE Result operator()(Argument& x) const { return static_cast(m_f(thrust::raw_reference_cast(x))); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE Result operator()(const Argument& x) const { return static_cast(m_f(thrust::raw_reference_cast(x))); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE Result operator()(Argument1& x, Argument2& y) const { return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE Result operator()(const Argument1& x, Argument2& y) const { return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE Result operator()(const Argument1& x, const Argument2& y) const { return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE Result operator()(Argument1& x, const Argument2& y) const { return static_cast(m_f(thrust::raw_reference_cast(x), @@ -99,57 +99,57 @@ struct wrapped_function { // mutable because Function::operator() might be const mutable Function m_f; - inline __host__ __device__ + inline THRUST_HOST_DEVICE wrapped_function() : m_f() {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE wrapped_function(const Function& f) : m_f(f) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE void operator()(Argument& x) const { m_f(thrust::raw_reference_cast(x)); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE void operator()(const Argument& x) const { m_f(thrust::raw_reference_cast(x)); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE void operator()(Argument1& x, Argument2& y) const { m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE void operator()(const Argument1& x, Argument2& y) const { m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE void operator()(const Argument1& x, const Argument2& y) const { m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __thrust_forceinline__ __host__ __device__ + THRUST_FORCEINLINE THRUST_HOST_DEVICE void operator()(Argument1& x, const Argument2& y) const { m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y)); diff --git a/thrust/detail/functional.inl b/thrust/detail/functional.inl index bdf8e0415..e7dcc0435 100644 --- a/thrust/detail/functional.inl +++ b/thrust/detail/functional.inl @@ -110,14 +110,14 @@ template }; // end binary_traits template - __host__ __device__ + THRUST_HOST_DEVICE unary_negate not1(const Predicate &pred) { return unary_negate(pred); } // end not1() template - __host__ __device__ + THRUST_HOST_DEVICE binary_negate not2(const BinaryPredicate &pred) { return binary_negate(pred); diff --git a/thrust/detail/functional/actor.h b/thrust/detail/functional/actor.h index 208a87061..b40a3c25f 100644 --- a/thrust/detail/functional/actor.h +++ b/thrust/detail/functional/actor.h @@ -59,19 +59,19 @@ template { typedef Eval eval_type; - __host__ __device__ + THRUST_HOST_DEVICE constexpr actor(); - __host__ __device__ + THRUST_HOST_DEVICE actor(const Eval &base); template - __host__ __device__ + THRUST_HOST_DEVICE typename apply_actor...>>::type operator()(Ts&&... ts) const; template - __host__ __device__ + THRUST_HOST_DEVICE typename assign_result::type operator=(const T &_1) const; }; // end actor @@ -82,7 +82,7 @@ template { typedef value type; - static inline __host__ __device__ type convert(const T &x) + static inline THRUST_HOST_DEVICE type convert(const T &x) { return val(x); } // end convert() @@ -94,7 +94,7 @@ template { typedef actor type; - static inline __host__ __device__ const type &convert(const actor &x) + static inline THRUST_HOST_DEVICE const type &convert(const actor &x) { return x; } // end convert() @@ -102,7 +102,7 @@ template template typename as_actor::type - __host__ __device__ + THRUST_HOST_DEVICE make_actor(const T &x) { return as_actor::convert(x); diff --git a/thrust/detail/functional/actor.inl b/thrust/detail/functional/actor.inl index 483c07022..29548ad4d 100644 --- a/thrust/detail/functional/actor.inl +++ b/thrust/detail/functional/actor.inl @@ -42,14 +42,14 @@ namespace functional { template - __host__ __device__ + THRUST_HOST_DEVICE constexpr actor ::actor() : eval_type() {} template - __host__ __device__ + THRUST_HOST_DEVICE actor ::actor(const Eval &base) : eval_type(base) @@ -84,7 +84,7 @@ using actor_check_ref_types = template template -__host__ __device__ +THRUST_HOST_DEVICE typename apply_actor::eval_type, thrust::tuple...>>::type actor::operator()(Ts&&... ts) const @@ -98,7 +98,7 @@ actor::operator()(Ts&&... ts) const template template - __host__ __device__ + THRUST_HOST_DEVICE typename assign_result::type actor ::operator=(const T& _1) const diff --git a/thrust/detail/functional/argument.h b/thrust/detail/functional/argument.h index af5c3202a..b7c1d3af3 100644 --- a/thrust/detail/functional/argument.h +++ b/thrust/detail/functional/argument.h @@ -57,11 +57,11 @@ template { }; - __host__ __device__ + THRUST_HOST_DEVICE constexpr argument(){} template - __host__ __device__ + THRUST_HOST_DEVICE typename result::type eval(const Env &e) const { return thrust::get(e); diff --git a/thrust/detail/functional/composite.h b/thrust/detail/functional/composite.h index 40466b3b8..14e142447 100644 --- a/thrust/detail/functional/composite.h +++ b/thrust/detail/functional/composite.h @@ -53,14 +53,14 @@ template >::type type; }; - __host__ __device__ + THRUST_HOST_DEVICE composite(const Eval0 &e0, const Eval1 &e1) : m_eval0(e0), m_eval1(e1) {} template - __host__ __device__ + THRUST_HOST_DEVICE typename result::type eval(const Env &x) const { @@ -88,7 +88,7 @@ template >::type type; }; - __host__ __device__ + THRUST_HOST_DEVICE composite(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2) : m_eval0(e0), m_eval1(e1), @@ -96,7 +96,7 @@ template {} template - __host__ __device__ + THRUST_HOST_DEVICE typename result::type eval(const Env &x) const { @@ -112,14 +112,14 @@ template }; // end composite template -__host__ __device__ +THRUST_HOST_DEVICE actor > compose(const Eval0 &e0, const Eval1 &e1) { return actor >(composite(e0,e1)); } template -__host__ __device__ +THRUST_HOST_DEVICE actor > compose(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2) { return actor >(composite(e0,e1,e2)); diff --git a/thrust/detail/functional/operators/arithmetic_operators.h b/thrust/detail/functional/operators/arithmetic_operators.h index 85adf433a..8f8783969 100644 --- a/thrust/detail/functional/operators/arithmetic_operators.h +++ b/thrust/detail/functional/operators/arithmetic_operators.h @@ -29,14 +29,14 @@ namespace functional { template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator>, actor > > -__host__ __device__ +THRUST_HOST_DEVICE operator-(const actor &_1) { return compose(transparent_unary_operator>(), _1); @@ -47,9 +47,9 @@ struct unary_plus { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(+THRUST_FWD(t1))) THRUST_TRAILING_RETURN(decltype(+THRUST_FWD(t1))) @@ -59,7 +59,7 @@ struct unary_plus }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator, @@ -72,7 +72,7 @@ operator+(const actor &_1) } // end operator+() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -88,7 +88,7 @@ operator+(const actor &_1, const T2 &_2) } // end operator+() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -104,7 +104,7 @@ operator+(const T1 &_1, const actor &_2) } // end operator+() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -120,7 +120,7 @@ operator+(const actor &_1, const actor &_2) } // end operator+() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -136,7 +136,7 @@ operator-(const T1 &_1, const actor &_2) } // end operator-() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -152,7 +152,7 @@ operator-(const actor &_1, const T2 &_2) } // end operator-() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -168,7 +168,7 @@ operator-(const actor &_1, const actor &_2) } // end operator-() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -184,7 +184,7 @@ operator*(const T1 &_1, const actor &_2) } // end operator*() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -200,7 +200,7 @@ operator*(const actor &_1, const T2 &_2) } // end operator*() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -216,7 +216,7 @@ operator*(const actor &_1, const actor &_2) } // end operator*() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -232,7 +232,7 @@ operator/(const actor &_1, const T2 &_2) } // end operator/() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -248,7 +248,7 @@ operator/(const T1 &_1, const actor &_2) } // end operator/() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -264,7 +264,7 @@ operator/(const actor &_1, const actor &_2) } // end operator/() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -280,7 +280,7 @@ operator%(const actor &_1, const T2 &_2) } // end operator%() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -296,7 +296,7 @@ operator%(const T1 &_1, const actor &_2) } // end operator%() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -316,9 +316,9 @@ struct prefix_increment { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(++THRUST_FWD(t1))) THRUST_TRAILING_RETURN(decltype(++THRUST_FWD(t1))) @@ -328,7 +328,7 @@ struct prefix_increment }; // end prefix_increment template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator, @@ -346,9 +346,9 @@ struct postfix_increment { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(THRUST_FWD(t1)++)) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1)++)) @@ -358,7 +358,7 @@ struct postfix_increment }; // end postfix_increment template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator, @@ -376,9 +376,9 @@ struct prefix_decrement { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(--THRUST_FWD(t1))) THRUST_TRAILING_RETURN(decltype(--THRUST_FWD(t1))) @@ -388,7 +388,7 @@ struct prefix_decrement }; // end prefix_decrement template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator, @@ -406,9 +406,9 @@ struct postfix_decrement { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(THRUST_FWD(t1)--)) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1)--)) @@ -418,7 +418,7 @@ struct postfix_decrement }; // end prefix_increment template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator, diff --git a/thrust/detail/functional/operators/assignment_operator.h b/thrust/detail/functional/operators/assignment_operator.h index 5f65b8e54..22f446b84 100644 --- a/thrust/detail/functional/operators/assignment_operator.h +++ b/thrust/detail/functional/operators/assignment_operator.h @@ -40,9 +40,9 @@ struct assign { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) = THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) = THRUST_FWD(t2))) @@ -64,7 +64,7 @@ template }; // end assign_result template - __host__ __device__ + THRUST_HOST_DEVICE typename assign_result::type do_assign(const actor &_1, const T &_2) { diff --git a/thrust/detail/functional/operators/bitwise_operators.h b/thrust/detail/functional/operators/bitwise_operators.h index a12dbacd5..843304585 100644 --- a/thrust/detail/functional/operators/bitwise_operators.h +++ b/thrust/detail/functional/operators/bitwise_operators.h @@ -29,7 +29,7 @@ namespace functional { template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -45,7 +45,7 @@ operator&(const actor &_1, const T2 &_2) } // end operator&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -61,7 +61,7 @@ operator&(const T1 &_1, const actor &_2) } // end operator&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -77,7 +77,7 @@ operator&(const actor &_1, const actor &_2) } // end operator&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -93,7 +93,7 @@ operator|(const actor &_1, const T2 &_2) } // end operator|() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -109,7 +109,7 @@ operator|(const T1 &_1, const actor &_2) } // end operator|() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -125,7 +125,7 @@ operator|(const actor &_1, const actor &_2) } // end operator|() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -141,7 +141,7 @@ operator^(const actor &_1, const T2 &_2) } // end operator^() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -157,7 +157,7 @@ operator^(const T1 &_1, const actor &_2) } // end operator^() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -178,9 +178,9 @@ struct bit_not { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(~THRUST_FWD(t1))) THRUST_TRAILING_RETURN(decltype(~THRUST_FWD(t1))) @@ -190,14 +190,14 @@ struct bit_not }; // end prefix_increment template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator, actor > > -__host__ __device__ +THRUST_HOST_DEVICE operator~(const actor &_1) { return compose(transparent_unary_operator(), _1); @@ -208,9 +208,9 @@ struct bit_lshift { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) << THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) << THRUST_FWD(t2))) @@ -220,7 +220,7 @@ struct bit_lshift }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -236,7 +236,7 @@ operator<<(const actor &_1, const T2 &_2) } // end operator<<() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -252,7 +252,7 @@ operator<<(const T1 &_1, const actor &_2) } // end operator<<() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -272,9 +272,9 @@ struct bit_rshift { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) >> THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) >> THRUST_FWD(t2))) @@ -285,7 +285,7 @@ struct bit_rshift template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -301,7 +301,7 @@ operator>>(const actor &_1, const T2 &_2) } // end operator>>() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -317,7 +317,7 @@ operator>>(const T1 &_1, const actor &_2) } // end operator>>() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, diff --git a/thrust/detail/functional/operators/compound_assignment_operators.h b/thrust/detail/functional/operators/compound_assignment_operators.h index 4697c0614..a992911d1 100644 --- a/thrust/detail/functional/operators/compound_assignment_operators.h +++ b/thrust/detail/functional/operators/compound_assignment_operators.h @@ -32,9 +32,9 @@ struct plus_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) += THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) += THRUST_FWD(t2))) @@ -44,7 +44,7 @@ struct plus_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -60,7 +60,7 @@ operator+=(const actor &_1, const T2 &_2) } // end operator+=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -80,9 +80,9 @@ struct minus_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) -= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) -= THRUST_FWD(t2))) @@ -92,7 +92,7 @@ struct minus_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -108,7 +108,7 @@ operator-=(const actor &_1, const T2 &_2) } // end operator-=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -128,9 +128,9 @@ struct multiplies_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) *= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) *= THRUST_FWD(t2))) @@ -140,7 +140,7 @@ struct multiplies_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -156,7 +156,7 @@ operator*=(const actor &_1, const T2 &_2) } // end operator*=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -176,9 +176,9 @@ struct divides_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) /= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) /= THRUST_FWD(t2))) @@ -188,7 +188,7 @@ struct divides_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -204,7 +204,7 @@ operator/=(const actor &_1, const T2 &_2) } // end operator/=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -224,9 +224,9 @@ struct modulus_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) %= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) %= THRUST_FWD(t2))) @@ -236,7 +236,7 @@ struct modulus_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -252,7 +252,7 @@ operator%=(const actor &_1, const T2 &_2) } // end operator%=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -272,9 +272,9 @@ struct bit_and_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) &= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) &= THRUST_FWD(t2))) @@ -284,7 +284,7 @@ struct bit_and_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -300,7 +300,7 @@ operator&=(const actor &_1, const T2 &_2) } // end operator&=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -320,9 +320,9 @@ struct bit_or_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) |= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) |= THRUST_FWD(t2))) @@ -332,7 +332,7 @@ struct bit_or_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -348,7 +348,7 @@ operator|=(const actor &_1, const T2 &_2) } // end operator|=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -368,9 +368,9 @@ struct bit_xor_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) ^= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) ^= THRUST_FWD(t2))) @@ -380,7 +380,7 @@ struct bit_xor_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -396,7 +396,7 @@ operator^=(const actor &_1, const T2 &_2) } // end operator|=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -416,9 +416,9 @@ struct bit_lshift_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) <<= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) <<= THRUST_FWD(t2))) @@ -427,7 +427,7 @@ struct bit_lshift_equal } }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -443,7 +443,7 @@ operator<<=(const actor &_1, const T2 &_2) } // end operator<<=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -463,9 +463,9 @@ struct bit_rshift_equal { using is_transparent = void; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&& t2) const noexcept(noexcept(THRUST_FWD(t1) >>= THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1) >>= THRUST_FWD(t2))) @@ -475,7 +475,7 @@ struct bit_rshift_equal }; template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, @@ -491,7 +491,7 @@ operator>>=(const actor &_1, const T2 &_2) } // end operator>>=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator, diff --git a/thrust/detail/functional/operators/logical_operators.h b/thrust/detail/functional/operators/logical_operators.h index 78989ec41..cc2a251b2 100644 --- a/thrust/detail/functional/operators/logical_operators.h +++ b/thrust/detail/functional/operators/logical_operators.h @@ -29,7 +29,7 @@ namespace functional { template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -45,7 +45,7 @@ operator&&(const actor &_1, const T2 &_2) } // end operator&&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -61,7 +61,7 @@ operator&&(const T1 &_1, const actor &_2) } // end operator&&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -77,7 +77,7 @@ operator&&(const actor &_1, const actor &_2) } // end operator&&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -93,7 +93,7 @@ operator||(const actor &_1, const T2 &_2) } // end operator&&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -109,7 +109,7 @@ operator||(const T1 &_1, const actor &_2) } // end operator&&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -125,7 +125,7 @@ operator||(const actor &_1, const actor &_2) } // end operator&&() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_unary_operator>, diff --git a/thrust/detail/functional/operators/operator_adaptors.h b/thrust/detail/functional/operators/operator_adaptors.h index 13cbcb72b..c83489cb1 100644 --- a/thrust/detail/functional/operators/operator_adaptors.h +++ b/thrust/detail/functional/operators/operator_adaptors.h @@ -70,7 +70,7 @@ struct transparent_unary_operator }; template - __host__ __device__ + THRUST_HOST_DEVICE result_type eval(Env&& e) const THRUST_RETURNS(UnaryFunctor{}(thrust::get<0>(THRUST_FWD(e)))) }; @@ -125,7 +125,7 @@ struct transparent_binary_operator }; template - __host__ __device__ + THRUST_HOST_DEVICE result_type eval(Env&& e) const THRUST_RETURNS(BinaryFunctor{}(thrust::get<0>(e), thrust::get<1>(e))) }; diff --git a/thrust/detail/functional/operators/relational_operators.h b/thrust/detail/functional/operators/relational_operators.h index 2bd96b477..7018d29da 100644 --- a/thrust/detail/functional/operators/relational_operators.h +++ b/thrust/detail/functional/operators/relational_operators.h @@ -29,7 +29,7 @@ namespace functional { template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -45,7 +45,7 @@ operator==(const actor &_1, const T2 &_2) } // end operator==() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -61,7 +61,7 @@ operator==(const T1 &_1, const actor &_2) } // end operator==() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -77,7 +77,7 @@ operator==(const actor &_1, const actor &_2) } // end operator==() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -93,7 +93,7 @@ operator!=(const actor &_1, const T2 &_2) } // end operator!=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -109,7 +109,7 @@ operator!=(const T1 &_1, const actor &_2) } // end operator!=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -125,7 +125,7 @@ operator!=(const actor &_1, const actor &_2) } // end operator!=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -141,7 +141,7 @@ operator>(const actor &_1, const T2 &_2) } // end operator>() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -157,7 +157,7 @@ operator>(const T1 &_1, const actor &_2) } // end operator>() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -173,7 +173,7 @@ operator>(const actor &_1, const actor &_2) } // end operator>() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -189,7 +189,7 @@ operator<(const actor &_1, const T2 &_2) } // end operator<() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -205,7 +205,7 @@ operator<(const T1 &_1, const actor &_2) } // end operator<() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -221,7 +221,7 @@ operator<(const actor &_1, const actor &_2) } // end operator<() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -237,7 +237,7 @@ operator>=(const actor &_1, const T2 &_2) } // end operator>=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -253,7 +253,7 @@ operator>=(const T1 &_1, const actor &_2) } // end operator>=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -269,7 +269,7 @@ operator>=(const actor &_1, const actor &_2) } // end operator>=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -285,7 +285,7 @@ operator<=(const actor &_1, const T2 &_2) } // end operator<=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, @@ -301,7 +301,7 @@ operator<=(const T1 &_1, const actor &_2) } // end operator<=() template -__host__ __device__ +THRUST_HOST_DEVICE actor< composite< transparent_binary_operator>, diff --git a/thrust/detail/functional/value.h b/thrust/detail/functional/value.h index d6b1563b1..eddc02a9d 100644 --- a/thrust/detail/functional/value.h +++ b/thrust/detail/functional/value.h @@ -49,13 +49,13 @@ template typedef T type; }; - __host__ __device__ + THRUST_HOST_DEVICE value(const T &arg) : m_val(arg) {} template - __host__ __device__ + THRUST_HOST_DEVICE T eval(const Env &) const { return m_val; @@ -66,7 +66,7 @@ template }; // end value template -__host__ __device__ +THRUST_HOST_DEVICE actor > val(const T &x) { return value(x); diff --git a/thrust/detail/gather.inl b/thrust/detail/gather.inl index 3812702f6..f5e81e859 100644 --- a/thrust/detail/gather.inl +++ b/thrust/detail/gather.inl @@ -26,12 +26,12 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather(const thrust::detail::execution_policy_base &exec, InputIterator map_first, InputIterator map_last, @@ -43,13 +43,13 @@ __host__ __device__ } // end gather() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, InputIterator1 map_first, InputIterator1 map_last, @@ -62,14 +62,14 @@ __host__ __device__ } // end gather_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, InputIterator1 map_first, InputIterator1 map_last, diff --git a/thrust/detail/generate.inl b/thrust/detail/generate.inl index 2ecb65d58..78492aed5 100644 --- a/thrust/detail/generate.inl +++ b/thrust/detail/generate.inl @@ -26,11 +26,11 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void generate(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -41,12 +41,12 @@ __host__ __device__ } // end generate() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator generate_n(const thrust::detail::execution_policy_base &exec, OutputIterator first, Size n, diff --git a/thrust/detail/get_iterator_value.h b/thrust/detail/get_iterator_value.h index 27e0a4e47..6e08216d3 100644 --- a/thrust/detail/get_iterator_value.h +++ b/thrust/detail/get_iterator_value.h @@ -29,7 +29,7 @@ namespace detail { // -------------------------------------------------- // it is okay to dereference iterator in the usual way template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::value_type get_iterator_value(thrust::execution_policy &, Iterator it) { @@ -43,7 +43,7 @@ get_iterator_value(thrust::execution_policy &, Iterator it) // we use get_value(exec,pointer*) function // to perform a dereferencing consistent with the execution policy template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::pointer_traits::element_type get_iterator_value(thrust::execution_policy &exec, Pointer* ptr) { diff --git a/thrust/detail/inner_product.inl b/thrust/detail/inner_product.inl index 97cd2b0b5..b8e5b4812 100644 --- a/thrust/detail/inner_product.inl +++ b/thrust/detail/inner_product.inl @@ -25,12 +25,12 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -42,14 +42,14 @@ OutputType inner_product(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/detail/integer_math.h b/thrust/detail/integer_math.h index af5960a85..e4a5a3903 100644 --- a/thrust/detail/integer_math.h +++ b/thrust/detail/integer_math.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2023-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ namespace detail { template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE Integer clz(Integer x) { Integer result; @@ -52,21 +52,21 @@ Integer clz(Integer x) } template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE bool is_power_of_2(Integer x) { return 0 == (x & (x - 1)); } template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE bool is_odd(Integer x) { return 1 & x; } template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE Integer log2(Integer x) { Integer num_bits = 8 * sizeof(Integer); @@ -77,7 +77,7 @@ Integer log2(Integer x) template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE Integer log2_ri(Integer x) { Integer result = log2(x); @@ -92,7 +92,7 @@ Integer log2_ri(Integer x) // x/y rounding towards +infinity for integers // Used to determine # of blocks/warps etc. template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE #if THRUST_CPP_DIALECT >= 2011 // FIXME: Should use common_type. auto divide_ri(Integer0 const x, Integer1 const y) @@ -108,7 +108,7 @@ Integer0 divide_ri(Integer0 const x, Integer1 const y) // x/y rounding towards zero for integers. // Used to determine # of blocks/warps etc. template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE #if THRUST_CPP_DIALECT >= 2011 auto divide_rz(Integer0 const x, Integer1 const y) THRUST_DECLTYPE_RETURNS(x / y) @@ -122,7 +122,7 @@ Integer0 divide_rz(Integer0 const x, Integer1 const y) // Round x towards infinity to the next multiple of y. template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE #if THRUST_CPP_DIALECT >= 2011 auto round_i(Integer0 const x, Integer1 const y) THRUST_DECLTYPE_RETURNS(y * divide_ri(x, y)) @@ -135,7 +135,7 @@ Integer0 round_i(Integer0 const x, Integer1 const y) // Round x towards 0 to the next multiple of y. template -__host__ __device__ __thrust_forceinline__ +THRUST_HOST_DEVICE THRUST_FORCEINLINE #if THRUST_CPP_DIALECT >= 2011 auto round_z(Integer0 const x, Integer1 const y) THRUST_DECLTYPE_RETURNS(y * divide_rz(x, y)) diff --git a/thrust/detail/internal_functional.h b/thrust/detail/internal_functional.h index 7a57f2c76..492423ba7 100644 --- a/thrust/detail/internal_functional.h +++ b/thrust/detail/internal_functional.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2018 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -44,11 +44,11 @@ struct unary_negate Predicate pred; - __host__ __device__ + THRUST_HOST_DEVICE explicit unary_negate(const Predicate& pred) : pred(pred) {} template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T& x) { return !bool(pred(x)); @@ -63,11 +63,11 @@ struct binary_negate Predicate pred; - __host__ __device__ + THRUST_HOST_DEVICE explicit binary_negate(const Predicate& pred) : pred(pred) {} template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T1& x, const T2& y) { return !bool(pred(x,y)); @@ -75,14 +75,14 @@ struct binary_negate }; template -__host__ __device__ +THRUST_HOST_DEVICE thrust::detail::unary_negate not1(const Predicate &pred) { return thrust::detail::unary_negate(pred); } template -__host__ __device__ +THRUST_HOST_DEVICE thrust::detail::binary_negate not2(const Predicate &pred) { return thrust::detail::binary_negate(pred); @@ -95,11 +95,11 @@ struct predicate_to_integral { Predicate pred; - __host__ __device__ + THRUST_HOST_DEVICE explicit predicate_to_integral(const Predicate& pred) : pred(pred) {} template - __host__ __device__ + THRUST_HOST_DEVICE IntegralType operator()(const T& x) { return pred(x) ? IntegralType(1) : IntegralType(0); @@ -114,7 +114,7 @@ struct equal_to typedef bool result_type; template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T1& lhs, const T2& rhs) const { return lhs == rhs; @@ -127,11 +127,11 @@ struct equal_to_value { T2 rhs; - __host__ __device__ + THRUST_HOST_DEVICE equal_to_value(const T2& rhs) : rhs(rhs) {} template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T1& lhs) const { return lhs == rhs; @@ -143,11 +143,11 @@ struct tuple_binary_predicate { typedef bool result_type; - __host__ __device__ + THRUST_HOST_DEVICE tuple_binary_predicate(const Predicate& p) : pred(p) {} template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const Tuple& t) const { return pred(thrust::get<0>(t), thrust::get<1>(t)); @@ -161,11 +161,11 @@ struct tuple_not_binary_predicate { typedef bool result_type; - __host__ __device__ + THRUST_HOST_DEVICE tuple_not_binary_predicate(const Predicate& p) : pred(p) {} template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const Tuple& t) const { return !pred(thrust::get<0>(t), thrust::get<1>(t)); @@ -179,8 +179,8 @@ template { typedef void result_type; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE host_generate_functor(Generator g) : gen(g) {} @@ -195,7 +195,7 @@ template // XXX change to an rvalue reference upon c++0x (which either a named variable // or temporary can bind to) template - __host__ + THRUST_HOST void operator()(const T &x) { // we have to be naughty and const_cast this to get it to work @@ -213,8 +213,8 @@ template { typedef void result_type; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE device_generate_functor(Generator g) : gen(g) {} @@ -229,7 +229,7 @@ template // XXX change to an rvalue reference upon c++0x (which either a named variable // or temporary can bind to) template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(const T &x) { // we have to be naughty and const_cast this to get it to work @@ -257,12 +257,12 @@ template { typedef ResultType result_type; - __host__ __device__ + THRUST_HOST_DEVICE zipped_binary_op(BinaryFunction binary_op) : m_binary_op(binary_op) {} template - __host__ __device__ + THRUST_HOST_DEVICE inline result_type operator()(Tuple t) { return m_binary_op(thrust::get<0>(t), thrust::get<1>(t)); @@ -309,14 +309,14 @@ template UnaryFunction f; - __host__ __device__ + THRUST_HOST_DEVICE unary_transform_functor(UnaryFunction f) : f(f) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if_non_const_reference_or_tuple_of_iterator_references< typename thrust::tuple_element<1,Tuple>::type >::type @@ -332,14 +332,14 @@ template { BinaryFunction f; - __host__ __device__ + THRUST_HOST_DEVICE binary_transform_functor(BinaryFunction f) : f(f) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if_non_const_reference_or_tuple_of_iterator_references< typename thrust::tuple_element<2,Tuple>::type >::type @@ -356,14 +356,14 @@ struct unary_transform_if_functor UnaryFunction unary_op; Predicate pred; - __host__ __device__ + THRUST_HOST_DEVICE unary_transform_if_functor(UnaryFunction unary_op, Predicate pred) : unary_op(unary_op), pred(pred) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if_non_const_reference_or_tuple_of_iterator_references< typename thrust::tuple_element<1,Tuple>::type >::type @@ -383,14 +383,14 @@ struct unary_transform_if_with_stencil_functor UnaryFunction unary_op; Predicate pred; - __host__ __device__ + THRUST_HOST_DEVICE unary_transform_if_with_stencil_functor(UnaryFunction unary_op, Predicate pred) : unary_op(unary_op), pred(pred) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if_non_const_reference_or_tuple_of_iterator_references< typename thrust::tuple_element<2,Tuple>::type >::type @@ -408,13 +408,13 @@ struct binary_transform_if_functor BinaryFunction binary_op; Predicate pred; - __host__ __device__ + THRUST_HOST_DEVICE binary_transform_if_functor(BinaryFunction binary_op, Predicate pred) : binary_op(binary_op), pred(pred) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename enable_if_non_const_reference_or_tuple_of_iterator_references< typename thrust::tuple_element<3,Tuple>::type >::type @@ -429,7 +429,7 @@ struct binary_transform_if_functor template struct host_destroy_functor { - __host__ + THRUST_HOST void operator()(T &x) const { x.~T(); @@ -441,7 +441,7 @@ template struct device_destroy_functor { // add __host__ to allow the omp backend to compile with nvcc - __host__ __device__ + THRUST_HOST_DEVICE void operator()(T &x) const { x.~T(); @@ -464,22 +464,22 @@ struct fill_functor { T exemplar; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE fill_functor(const T& _exemplar) : exemplar(_exemplar) {} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE fill_functor(const fill_functor & other) :exemplar(other.exemplar){} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE ~fill_functor() {} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE T operator()(void) const { return exemplar; @@ -492,21 +492,21 @@ template { T exemplar; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE uninitialized_fill_functor(const T & x):exemplar(x){} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE uninitialized_fill_functor(const uninitialized_fill_functor & other) :exemplar(other.exemplar){} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE ~uninitialized_fill_functor() {} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void operator()(T &x) { ::new(static_cast(&x)) T(exemplar); @@ -525,7 +525,7 @@ template : comp(c) {} template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T1 lhs, T2 rhs) { return comp(thrust::get<0>(lhs), thrust::get<0>(rhs)) || (!comp(thrust::get<0>(rhs), thrust::get<0>(lhs)) && thrust::get<1>(lhs) < thrust::get<1>(rhs)); @@ -540,13 +540,13 @@ template { Compare comp; - __host__ __device__ + THRUST_HOST_DEVICE compare_first(Compare comp) : comp(comp) {} template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const Tuple1 &x, const Tuple2 &y) { return comp(thrust::raw_reference_cast(thrust::get<0>(x)), thrust::raw_reference_cast(thrust::get<0>(y))); diff --git a/thrust/detail/logical.inl b/thrust/detail/logical.inl index 3d39cac92..e6c4061dc 100644 --- a/thrust/detail/logical.inl +++ b/thrust/detail/logical.inl @@ -24,9 +24,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool all_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) { using thrust::system::detail::generic::all_of; @@ -34,9 +34,9 @@ bool all_of(const thrust::detail::execution_policy_base &exec, In } // end all_of() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool any_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) { using thrust::system::detail::generic::any_of; @@ -44,9 +44,9 @@ bool any_of(const thrust::detail::execution_policy_base &exec, In } // end any_of() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool none_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) { using thrust::system::detail::generic::none_of; diff --git a/thrust/detail/malloc_and_free.h b/thrust/detail/malloc_and_free.h index 143518893..a84bc3763 100644 --- a/thrust/detail/malloc_and_free.h +++ b/thrust/detail/malloc_and_free.h @@ -25,9 +25,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE pointer malloc(const thrust::detail::execution_policy_base &exec, std::size_t n) { using thrust::system::detail::generic::malloc; @@ -38,9 +38,9 @@ pointer malloc(const thrust::detail::execution_policy_base(raw_ptr); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE pointer malloc(const thrust::detail::execution_policy_base &exec, std::size_t n) { using thrust::system::detail::generic::malloc; @@ -58,7 +58,7 @@ pointer malloc(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE void free(const thrust::detail::execution_policy_base &exec, Pointer ptr) { using thrust::system::detail::generic::free; diff --git a/thrust/detail/memory_algorithms.h b/thrust/detail/memory_algorithms.h index 78b999e85..93406b7b9 100644 --- a/thrust/detail/memory_algorithms.h +++ b/thrust/detail/memory_algorithms.h @@ -1,5 +1,5 @@ // Copyright (c) 2018 NVIDIA Corporation -// Modifications Copyright© 2023 Advanced Micro Devices, Inc. All rights reserved. +// Modifications Copyright© 2023-2024 Advanced Micro Devices, Inc. All rights reserved. // Author: Bryce Adelstein Lelbach // // Distributed under the Boost Software License v1.0 (boost.org/LICENSE_1_0.txt) @@ -27,14 +27,14 @@ THRUST_NAMESPACE_BEGIN /////////////////////////////////////////////////////////////////////////////// template -__host__ __device__ +THRUST_HOST_DEVICE void destroy_at(T* location) { location->~T(); } template -__host__ __device__ +THRUST_HOST_DEVICE void destroy_at(Allocator const& alloc, T* location) { typedef typename detail::allocator_traits< @@ -49,7 +49,7 @@ void destroy_at(Allocator const& alloc, T* location) } template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIt destroy(ForwardIt first, ForwardIt last) { for (; first != last; ++first) @@ -59,7 +59,7 @@ ForwardIt destroy(ForwardIt first, ForwardIt last) } template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIt destroy(Allocator const& alloc, ForwardIt first, ForwardIt last) { typedef typename iterator_traits::value_type T; @@ -78,7 +78,7 @@ ForwardIt destroy(Allocator const& alloc, ForwardIt first, ForwardIt last) } template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIt destroy_n(ForwardIt first, Size n) { for (; n > 0; (void) ++first, --n) @@ -88,7 +88,7 @@ ForwardIt destroy_n(ForwardIt first, Size n) } template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIt destroy_n(Allocator const& alloc, ForwardIt first, Size n) { typedef typename iterator_traits::value_type T; @@ -107,7 +107,7 @@ ForwardIt destroy_n(Allocator const& alloc, ForwardIt first, Size n) } template -__host__ __device__ +THRUST_HOST_DEVICE void uninitialized_construct( ForwardIt first, ForwardIt last, Args const&... args ) diff --git a/thrust/detail/merge.inl b/thrust/detail/merge.inl index 1595cc1a1..c2fe0cc3c 100644 --- a/thrust/detail/merge.inl +++ b/thrust/detail/merge.inl @@ -26,12 +26,12 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -44,13 +44,13 @@ __host__ __device__ } // end merge() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -64,9 +64,9 @@ __host__ __device__ } // end merge() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, @@ -80,9 +80,9 @@ __host__ __device__ } // end merge_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, diff --git a/thrust/detail/minmax.h b/thrust/detail/minmax.h index c565a74bd..f4ba7793c 100644 --- a/thrust/detail/minmax.h +++ b/thrust/detail/minmax.h @@ -21,28 +21,28 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp) { return comp(rhs, lhs) ? rhs : lhs; } // end min() template -__host__ __device__ +THRUST_HOST_DEVICE T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs) { return rhs < lhs ? rhs : lhs; } // end min() template -__host__ __device__ +THRUST_HOST_DEVICE T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp) { return comp(lhs,rhs) ? rhs : lhs; } // end max() template -__host__ __device__ +THRUST_HOST_DEVICE T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs) { return lhs < rhs ? rhs : lhs; diff --git a/thrust/detail/mismatch.inl b/thrust/detail/mismatch.inl index 16c579d80..83a5fcc9a 100644 --- a/thrust/detail/mismatch.inl +++ b/thrust/detail/mismatch.inl @@ -25,9 +25,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -38,9 +38,9 @@ thrust::pair mismatch(const thrust::detail::exec } // end mismatch() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/detail/numeric_traits.h b/thrust/detail/numeric_traits.h index e728adcaf..38e97a1e3 100644 --- a/thrust/detail/numeric_traits.h +++ b/thrust/detail/numeric_traits.h @@ -116,7 +116,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE typename numeric_difference::type numeric_distance(Number x, Number y) { diff --git a/thrust/detail/pair.inl b/thrust/detail/pair.inl index 4b7dd6eb0..99118656a 100644 --- a/thrust/detail/pair.inl +++ b/thrust/detail/pair.inl @@ -25,7 +25,7 @@ THRUST_NAMESPACE_BEGIN template - __host__ __device__ + THRUST_HOST_DEVICE pair ::pair(void) :first(),second() @@ -35,7 +35,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE pair ::pair(const T1 &x, const T2 &y) :first(x),second(y) @@ -46,7 +46,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE pair ::pair(const pair &p) :first(p.first),second(p.second) @@ -57,7 +57,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE pair ::pair(const std::pair &p) :first(p.first),second(p.second) @@ -67,7 +67,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void pair ::swap(thrust::pair &p) { @@ -79,7 +79,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator==(const pair &x, const pair &y) { return x.first == y.first && x.second == y.second; @@ -87,7 +87,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator<(const pair &x, const pair &y) { return x.first < y.first || (!(y.first < x.first) && x.second < y.second); @@ -95,7 +95,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator!=(const pair &x, const pair &y) { return !(x == y); @@ -103,7 +103,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator>(const pair &x, const pair &y) { return y < x; @@ -111,7 +111,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator<=(const pair &x, const pair &y) { return !(y < x); @@ -119,7 +119,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator>=(const pair &x, const pair &y) { return !(x < y); @@ -127,7 +127,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void swap(pair &x, pair &y) { return x.swap(y); @@ -135,7 +135,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE pair make_pair(T1 x, T2 y) { return pair(x,y); @@ -174,14 +174,14 @@ template struct pair_get {}; template struct pair_get<0, Pair> { - inline __host__ __device__ + inline THRUST_HOST_DEVICE const typename tuple_element<0, Pair>::type & operator()(const Pair &p) const { return p.first; } // end operator()() - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename tuple_element<0, Pair>::type & operator()(Pair &p) const { @@ -193,14 +193,14 @@ template template struct pair_get<1, Pair> { - inline __host__ __device__ + inline THRUST_HOST_DEVICE const typename tuple_element<1, Pair>::type & operator()(const Pair &p) const { return p.second; } // end operator()() - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename tuple_element<1, Pair>::type & operator()(Pair &p) const { @@ -213,7 +213,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename tuple_element >::type & get(pair &p) { @@ -221,7 +221,7 @@ template } // end get() template - inline __host__ __device__ + inline THRUST_HOST_DEVICE const typename tuple_element >::type & get(const pair &p) { diff --git a/thrust/detail/partition.inl b/thrust/detail/partition.inl index 5c51bca80..8d05a9499 100644 --- a/thrust/detail/partition.inl +++ b/thrust/detail/partition.inl @@ -25,11 +25,11 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -40,12 +40,12 @@ __host__ __device__ } // end partition() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -57,13 +57,13 @@ __host__ __device__ } // end partition() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -77,14 +77,14 @@ __host__ __device__ } // end partition_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 first, @@ -99,11 +99,11 @@ __host__ __device__ } // end partition_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -114,12 +114,12 @@ __host__ __device__ } // end stable_partition() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -131,13 +131,13 @@ __host__ __device__ } // end stable_partition() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -151,14 +151,14 @@ __host__ __device__ } // end stable_partition_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 first, @@ -173,9 +173,9 @@ __host__ __device__ } // end stable_partition_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition_point(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -186,9 +186,9 @@ __host__ __device__ } // end partition_point() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool is_partitioned(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, diff --git a/thrust/detail/pointer.h b/thrust/detail/pointer.h index b5baf8c5b..b1846f51e 100644 --- a/thrust/detail/pointer.h +++ b/thrust/detail/pointer.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2021 NVIDIA Corporation - * Modifications Copyright© 2019-2021 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -139,7 +139,7 @@ template // friend iterator_core_access to give it access to dereference friend class thrust::iterator_core_access; - __host__ __device__ + THRUST_HOST_DEVICE typename super_t::reference dereference() const; // don't provide access to this part of super_t's interface @@ -151,28 +151,28 @@ template // constructors - __host__ __device__ + THRUST_HOST_DEVICE pointer(); // NOTE: This is needed so that Thrust smart pointers can be used in // `std::unique_ptr`. - __host__ __device__ + THRUST_HOST_DEVICE pointer(std::nullptr_t); // OtherValue shall be convertible to Value // XXX consider making the pointer implementation a template parameter which defaults to Element * template - __host__ __device__ + THRUST_HOST_DEVICE explicit pointer(OtherElement *ptr); // Fixes hipcc linkage error - __host__ __device__ + THRUST_HOST_DEVICE explicit pointer(Element *ptr); // OtherPointer's element_type shall be convertible to Element // OtherPointer's system shall be convertible to Tag template - __host__ __device__ + THRUST_HOST_DEVICE pointer(const OtherPointer &other, typename thrust::detail::enable_if_pointer_is_convertible< OtherPointer, @@ -182,7 +182,7 @@ template // OtherPointer's element_type shall be void // OtherPointer's system shall be convertible to Tag template - __host__ __device__ + THRUST_HOST_DEVICE explicit pointer(const OtherPointer &other, typename thrust::detail::enable_if_void_pointer_is_system_convertible< @@ -194,13 +194,13 @@ template // NOTE: This is needed so that Thrust smart pointers can be used in // `std::unique_ptr`. - __host__ __device__ + THRUST_HOST_DEVICE derived_type& operator=(std::nullptr_t); // OtherPointer's element_type shall be convertible to Element // OtherPointer's system shall be convertible to Tag template - __host__ __device__ + THRUST_HOST_DEVICE typename thrust::detail::enable_if_pointer_is_convertible< OtherPointer, pointer, @@ -210,18 +210,18 @@ template // observers - __host__ __device__ + THRUST_HOST_DEVICE Element *get() const; - __host__ __device__ + THRUST_HOST_DEVICE Element *operator->() const; // NOTE: This is needed so that Thrust smart pointers can be used in // `std::unique_ptr`. - __host__ __device__ + THRUST_HOST_DEVICE explicit operator bool() const; - __host__ __device__ + THRUST_HOST_DEVICE static derived_type pointer_to(typename thrust::detail::pointer_traits_detail::pointer_to_param::type r) { return thrust::detail::pointer_traits::pointer_to(r); @@ -232,7 +232,7 @@ template // Output stream operator template -__host__ +THRUST_HOST std::basic_ostream & operator<<(std::basic_ostream &os, const pointer &p); @@ -240,19 +240,19 @@ operator<<(std::basic_ostream &os, // NOTE: This is needed so that Thrust smart pointers can be used in // `std::unique_ptr`. template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(std::nullptr_t, pointer p); template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(pointer p, std::nullptr_t); template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(std::nullptr_t, pointer p); template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(pointer p, std::nullptr_t); THRUST_NAMESPACE_END diff --git a/thrust/detail/pointer.inl b/thrust/detail/pointer.inl index 3a23bdf32..d854648c1 100644 --- a/thrust/detail/pointer.inl +++ b/thrust/detail/pointer.inl @@ -1,6 +1,6 @@ /* * Copyright 2008-2021 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ THRUST_NAMESPACE_BEGIN template - __host__ __device__ + THRUST_HOST_DEVICE pointer ::pointer() : super_t(static_cast(nullptr)) @@ -33,7 +33,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE pointer ::pointer(std::nullptr_t) : super_t(static_cast(nullptr)) @@ -42,7 +42,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE pointer ::pointer(OtherElement *other) : super_t(other) @@ -50,7 +50,7 @@ template // Fixes hipcc linkage error template - __host__ __device__ + THRUST_HOST_DEVICE pointer ::pointer(Element* ptr) : super_t(ptr) @@ -58,7 +58,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE pointer ::pointer(const OtherPointer &other, typename thrust::detail::enable_if_pointer_is_convertible< @@ -71,7 +71,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE pointer ::pointer(const OtherPointer &other, typename thrust::detail::enable_if_void_pointer_is_system_convertible< @@ -83,7 +83,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename pointer::derived_type & pointer ::operator=(decltype(nullptr)) @@ -95,7 +95,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename thrust::detail::enable_if_pointer_is_convertible< OtherPointer, pointer, @@ -114,7 +114,7 @@ namespace detail // Implementation for dereference() when Reference is Element&, // e.g. cuda's managed_memory_pointer template -__host__ __device__ +THRUST_HOST_DEVICE Reference pointer_dereference_impl(const Derived& ptr, thrust::detail::true_type /* is_cpp_ref */) { @@ -123,7 +123,7 @@ Reference pointer_dereference_impl(const Derived& ptr, // Implementation for pointers with proxy references: template -__host__ __device__ +THRUST_HOST_DEVICE Reference pointer_dereference_impl(const Derived& ptr, thrust::detail::false_type /* is_cpp_ref */) { @@ -134,7 +134,7 @@ Reference pointer_dereference_impl(const Derived& ptr, template - __host__ __device__ + THRUST_HOST_DEVICE typename pointer::super_t::reference pointer ::dereference() const @@ -151,7 +151,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE Element *pointer ::get() const { @@ -160,7 +160,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE Element *pointer ::operator->() const { @@ -169,7 +169,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE pointer ::operator bool() const { @@ -179,7 +179,7 @@ template template -__host__ +THRUST_HOST std::basic_ostream & operator<<(std::basic_ostream &os, const pointer &p) { @@ -189,28 +189,28 @@ operator<<(std::basic_ostream &os, // NOTE: These are needed so that Thrust smart pointers work with // `std::unique_ptr`. template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(std::nullptr_t, pointer p) { return nullptr == p.get(); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(pointer p, std::nullptr_t) { return nullptr == p.get(); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(std::nullptr_t, pointer p) { return !(nullptr == p); } template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(pointer p, std::nullptr_t) { return !(nullptr == p); diff --git a/thrust/detail/range/head_flags.h b/thrust/detail/range/head_flags.h index b755840c9..ced43a11b 100644 --- a/thrust/detail/range/head_flags.h +++ b/thrust/detail/range/head_flags.h @@ -48,18 +48,18 @@ template - __host__ __device__ __thrust_forceinline__ + THRUST_HOST_DEVICE THRUST_FORCEINLINE result_type operator()(const Tuple &t) { const IndexType i = thrust::get<0>(t); @@ -81,36 +81,36 @@ template > > iterator; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE head_flags_with_init(RandomAccessIterator first, RandomAccessIterator last, init_type init) : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first - 1)), head_flag_functor(init, last - first))), m_end(m_begin + (last - first)) {} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE head_flags_with_init(RandomAccessIterator first, RandomAccessIterator last, init_type init, BinaryPredicate binary_pred) : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first - 1)), head_flag_functor(init, last - first, binary_pred))), m_end(m_begin + (last - first)) {} - __host__ __device__ + THRUST_HOST_DEVICE iterator begin() const { return m_begin; } - __host__ __device__ + THRUST_HOST_DEVICE iterator end() const { return m_end; } template - __host__ __device__ + THRUST_HOST_DEVICE typename iterator::reference operator[](OtherIndex i) { return *(begin() + i); @@ -138,18 +138,18 @@ template - __host__ __device__ __thrust_forceinline__ + THRUST_HOST_DEVICE THRUST_FORCEINLINE result_type operator()(const Tuple &t) { const IndexType i = thrust::get<0>(t); @@ -168,34 +168,34 @@ template > > iterator; - __host__ __device__ + THRUST_HOST_DEVICE head_flags(RandomAccessIterator first, RandomAccessIterator last) : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first - 1)), head_flag_functor(last - first))), m_end(m_begin + (last - first)) {} - __host__ __device__ + THRUST_HOST_DEVICE head_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first - 1)), head_flag_functor(last - first, binary_pred))), m_end(m_begin + (last - first)) {} - __host__ __device__ + THRUST_HOST_DEVICE iterator begin() const { return m_begin; } - __host__ __device__ + THRUST_HOST_DEVICE iterator end() const { return m_end; } template - __host__ __device__ + THRUST_HOST_DEVICE typename iterator::reference operator[](OtherIndex i) { return *(begin() + i); @@ -207,7 +207,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE head_flags make_head_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) { @@ -216,7 +216,7 @@ head_flags template -__host__ __device__ +THRUST_HOST_DEVICE head_flags make_head_flags(RandomAccessIterator first, RandomAccessIterator last) { diff --git a/thrust/detail/range/tail_flags.h b/thrust/detail/range/tail_flags.h index 41ee5dd29..8cbc1d62b 100644 --- a/thrust/detail/range/tail_flags.h +++ b/thrust/detail/range/tail_flags.h @@ -45,17 +45,17 @@ template iterator; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE tail_flags(RandomAccessIterator first, RandomAccessIterator last) : m_begin(thrust::make_transform_iterator(thrust::counting_iterator(0), tail_flag_functor(first, last))), m_end(m_begin + (last - first)) {} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) : m_begin(thrust::make_transform_iterator(thrust::counting_iterator(0), tail_flag_functor(first, last, binary_pred))), m_end(m_begin + (last - first)) {} - __host__ __device__ + THRUST_HOST_DEVICE iterator begin() const { return m_begin; } - __host__ __device__ + THRUST_HOST_DEVICE iterator end() const { return m_end; } template - __host__ __device__ + THRUST_HOST_DEVICE typename iterator::reference operator[](OtherIndex i) { return *(begin() + i); @@ -111,7 +111,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE tail_flags make_tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) { @@ -120,7 +120,7 @@ tail_flags template -__host__ __device__ +THRUST_HOST_DEVICE tail_flags make_tail_flags(RandomAccessIterator first, RandomAccessIterator last) { diff --git a/thrust/detail/raw_pointer_cast.h b/thrust/detail/raw_pointer_cast.h index 53a77861e..3441ae2ef 100644 --- a/thrust/detail/raw_pointer_cast.h +++ b/thrust/detail/raw_pointer_cast.h @@ -22,7 +22,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::pointer_traits::raw_pointer raw_pointer_cast(Pointer ptr) { @@ -30,7 +30,7 @@ raw_pointer_cast(Pointer ptr) } template -__host__ __device__ +THRUST_HOST_DEVICE ToPointer reinterpret_pointer_cast(FromPointer ptr) { @@ -39,7 +39,7 @@ reinterpret_pointer_cast(FromPointer ptr) } template -__host__ __device__ +THRUST_HOST_DEVICE ToPointer static_pointer_cast(FromPointer ptr) { diff --git a/thrust/detail/raw_reference_cast.h b/thrust/detail/raw_reference_cast.h index 99af0da18..9d75db581 100644 --- a/thrust/detail/raw_reference_cast.h +++ b/thrust/detail/raw_reference_cast.h @@ -227,13 +227,13 @@ template < // provide declarations of raw_reference_cast's overloads for raw_reference_caster below template -__host__ __device__ +THRUST_HOST_DEVICE typename detail::raw_reference::type raw_reference_cast(T &ref); template -__host__ __device__ +THRUST_HOST_DEVICE typename detail::raw_reference::type raw_reference_cast(const T &ref); @@ -241,7 +241,7 @@ typename detail::raw_reference::type template< typename... Ts > -__host__ __device__ +THRUST_HOST_DEVICE typename detail::enable_if_unwrappable< thrust::detail::tuple_of_iterator_references, typename detail::raw_reference< @@ -258,14 +258,14 @@ namespace detail struct raw_reference_caster { template - __host__ __device__ + THRUST_HOST_DEVICE typename detail::raw_reference::type operator()(T &ref) { return thrust::raw_reference_cast(ref); } template - __host__ __device__ + THRUST_HOST_DEVICE typename detail::raw_reference::type operator()(const T &ref) { return thrust::raw_reference_cast(ref); @@ -274,7 +274,7 @@ struct raw_reference_caster template< typename... Ts > - __host__ __device__ + THRUST_HOST_DEVICE typename detail::raw_reference< thrust::detail::tuple_of_iterator_references >::type @@ -292,7 +292,7 @@ struct raw_reference_caster template -__host__ __device__ +THRUST_HOST_DEVICE typename detail::raw_reference::type raw_reference_cast(T &ref) { @@ -301,7 +301,7 @@ typename detail::raw_reference::type template -__host__ __device__ +THRUST_HOST_DEVICE typename detail::raw_reference::type raw_reference_cast(const T &ref) { @@ -312,7 +312,7 @@ typename detail::raw_reference::type template< typename... Ts > -__host__ __device__ +THRUST_HOST_DEVICE typename detail::enable_if_unwrappable< thrust::detail::tuple_of_iterator_references, typename detail::raw_reference< diff --git a/thrust/detail/reduce.inl b/thrust/detail/reduce.inl index 448a4b38c..895428ee3 100644 --- a/thrust/detail/reduce.inl +++ b/thrust/detail/reduce.inl @@ -29,9 +29,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::value_type reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last) { @@ -40,9 +40,9 @@ __host__ __device__ } // end reduce() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -53,12 +53,12 @@ __host__ __device__ } // end reduce() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -70,13 +70,13 @@ __host__ __device__ } // end reduce() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -90,14 +90,14 @@ __host__ __device__ } // end reduce_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -112,7 +112,7 @@ __host__ __device__ } // end reduce_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, diff --git a/thrust/detail/reference.h b/thrust/detail/reference.h index 5cc13625d..1dc9e8b40 100644 --- a/thrust/detail/reference.h +++ b/thrust/detail/reference.h @@ -70,7 +70,7 @@ class reference * \param other A \p reference to copy from. */ template - __host__ __device__ + THRUST_HOST_DEVICE reference( reference const& other /*! \cond @@ -93,7 +93,7 @@ class reference * * \param ptr A \p pointer to construct from. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit reference(pointer const& p) : ptr(p) {} /*! Assign the object referred to \p other to the object referred to by @@ -103,7 +103,7 @@ class reference * * \return *this. */ - __host__ __device__ + THRUST_HOST_DEVICE derived_type& operator=(reference const& other) { assign_from(&other); @@ -122,7 +122,7 @@ class reference * \return *this. */ template - __host__ __device__ + THRUST_HOST_DEVICE /*! \cond */ typename std::enable_if< @@ -150,7 +150,7 @@ class reference * * \return *this. */ - __host__ __device__ + THRUST_HOST_DEVICE derived_type& operator=(value_type const& rhs) { assign_from(&rhs); @@ -162,7 +162,7 @@ class reference * * \param other The \p tagged_reference to swap with. */ - __host__ __device__ + THRUST_HOST_DEVICE void swap(derived_type& other) { // Avoid default-constructing a system; instead, just use a null pointer @@ -172,11 +172,11 @@ class reference swap(system, other); } - __host__ __device__ pointer operator&() const { return ptr; } + THRUST_HOST_DEVICE pointer operator&() const { return ptr; } // This is inherently hazardous, as it discards the strong type information // about what system the object is on. - __host__ __device__ operator value_type() const + THRUST_HOST_DEVICE operator value_type() const { // Avoid default-constructing a system; instead, just use a null pointer // for dispatch. This assumes that `get_value` will not access any system @@ -185,7 +185,7 @@ class reference return convert_to_value_type(system); } - __host__ __device__ + THRUST_HOST_DEVICE derived_type& operator++() { // Sadly, this has to make a copy. The only mechanism we have for @@ -197,7 +197,7 @@ class reference return derived(); } - __host__ __device__ + THRUST_HOST_DEVICE value_type operator++(int) { value_type tmp = *this; @@ -225,7 +225,7 @@ class reference return derived(); } - __host__ __device__ + THRUST_HOST_DEVICE derived_type& operator+=(value_type const& rhs) { value_type tmp = *this; @@ -319,11 +319,11 @@ class reference template friend class reference; - __host__ __device__ + THRUST_HOST_DEVICE derived_type& derived() { return static_cast(*this); } template - __host__ __device__ + THRUST_HOST_DEVICE value_type convert_to_value_type(System* system) const { using thrust::system::detail::generic::select_system; @@ -331,7 +331,7 @@ class reference } template - __host__ __device__ + THRUST_HOST_DEVICE value_type strip_const_get_value(System const& system) const { System &non_const_system = const_cast(system); @@ -341,7 +341,7 @@ class reference } template - __host__ __device__ + THRUST_HOST_DEVICE void assign_from(System0* system0, System1* system1, OtherPointer src) { using thrust::system::detail::generic::select_system; @@ -349,7 +349,7 @@ class reference } template - __host__ __device__ + THRUST_HOST_DEVICE void assign_from(OtherPointer src) { // Avoid default-constructing systems; instead, just use a null pointer @@ -361,7 +361,7 @@ class reference } template - __host__ __device__ + THRUST_HOST_DEVICE void strip_const_assign_value(System const& system, OtherPointer src) { System& non_const_system = const_cast(system); @@ -371,7 +371,7 @@ class reference } template - __host__ __device__ + THRUST_HOST_DEVICE void swap(System* system, derived_type& other) { using thrust::system::detail::generic::select_system; @@ -437,7 +437,7 @@ class tagged_reference * \param other A \p tagged_reference to copy from. */ template - __host__ __device__ + THRUST_HOST_DEVICE tagged_reference(tagged_reference const& other) : base_type(other) {} @@ -448,7 +448,7 @@ class tagged_reference * * \param ptr A \p pointer to construct from. */ - __host__ __device__ explicit tagged_reference(pointer const& p) + THRUST_HOST_DEVICE explicit tagged_reference(pointer const& p) : base_type(p) {} @@ -459,7 +459,7 @@ class tagged_reference * * \return *this. */ - __host__ __device__ + THRUST_HOST_DEVICE tagged_reference& operator=(tagged_reference const& other) { return base_type::operator=(other); @@ -476,7 +476,7 @@ class tagged_reference * \return *this. */ template - __host__ __device__ + THRUST_HOST_DEVICE tagged_reference& operator=(tagged_reference const& other) { @@ -489,7 +489,7 @@ class tagged_reference * * \return *this. */ - __host__ __device__ + THRUST_HOST_DEVICE tagged_reference& operator=(value_type const& rhs) { return base_type::operator=(rhs); @@ -508,7 +508,7 @@ class tagged_reference {}; * \param y The second \p tagged_reference of interest. */ template -__host__ __device__ +THRUST_HOST_DEVICE void swap(tagged_reference& x, tagged_reference& y) { x.swap(y); diff --git a/thrust/detail/remove.inl b/thrust/detail/remove.inl index 7ccc0cc46..42f939642 100644 --- a/thrust/detail/remove.inl +++ b/thrust/detail/remove.inl @@ -26,11 +26,11 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -41,12 +41,12 @@ __host__ __device__ } // end remove() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -58,11 +58,11 @@ __host__ __device__ } // end remove_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -73,12 +73,12 @@ __host__ __device__ } // end remove_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -90,12 +90,12 @@ __host__ __device__ } // end remove_copy_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -107,13 +107,13 @@ __host__ __device__ } // end remove_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/detail/replace.inl b/thrust/detail/replace.inl index 629287bee..dd13f02f1 100644 --- a/thrust/detail/replace.inl +++ b/thrust/detail/replace.inl @@ -26,9 +26,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void replace(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, const T &old_value, @@ -39,9 +39,9 @@ __host__ __device__ } // end replace() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, Predicate pred, @@ -52,9 +52,9 @@ __host__ __device__ } // end replace_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, InputIterator stencil, @@ -66,9 +66,9 @@ __host__ __device__ } // end replace_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, @@ -80,9 +80,9 @@ __host__ __device__ } // end replace_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, @@ -94,9 +94,9 @@ __host__ __device__ } // end replace_copy_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, InputIterator2 stencil, diff --git a/thrust/detail/reverse.inl b/thrust/detail/reverse.inl index dc316d18f..0ce56c139 100644 --- a/thrust/detail/reverse.inl +++ b/thrust/detail/reverse.inl @@ -26,9 +26,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void reverse(const thrust::detail::execution_policy_base &exec, BidirectionalIterator first, BidirectionalIterator last) @@ -38,9 +38,9 @@ __host__ __device__ } // end reverse() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator reverse_copy(const thrust::detail::execution_policy_base &exec, BidirectionalIterator first, BidirectionalIterator last, diff --git a/thrust/detail/scan.inl b/thrust/detail/scan.inl index b781b0e28..4a31d9dcb 100644 --- a/thrust/detail/scan.inl +++ b/thrust/detail/scan.inl @@ -28,11 +28,11 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -43,12 +43,12 @@ __host__ __device__ } // end inclusive_scan() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -60,11 +60,11 @@ __host__ __device__ } // end inclusive_scan() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -75,12 +75,12 @@ __host__ __device__ } // end exclusive_scan() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -92,13 +92,13 @@ __host__ __device__ } // end exclusive_scan() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -111,12 +111,12 @@ __host__ __device__ } // end exclusive_scan() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -128,13 +128,13 @@ __host__ __device__ } // end inclusive_scan_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -147,14 +147,14 @@ __host__ __device__ } // end inclusive_scan_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -168,12 +168,12 @@ __host__ __device__ } // end inclusive_scan_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -185,13 +185,13 @@ __host__ __device__ } // end exclusive_scan_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -204,14 +204,14 @@ __host__ __device__ } // end exclusive_scan_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -225,7 +225,7 @@ __host__ __device__ } // end exclusive_scan_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/detail/scatter.inl b/thrust/detail/scatter.inl index 30dd611d1..d229f95bb 100644 --- a/thrust/detail/scatter.inl +++ b/thrust/detail/scatter.inl @@ -27,12 +27,12 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void scatter(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, @@ -44,13 +44,13 @@ __host__ __device__ } // end scatter() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, @@ -63,14 +63,14 @@ __host__ __device__ } // end scatter_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/detail/select_system.h b/thrust/detail/select_system.h index 6bfc719b0..ca94cc0a7 100644 --- a/thrust/detail/select_system.h +++ b/thrust/detail/select_system.h @@ -45,9 +45,9 @@ using thrust::system::detail::generic::select_system; struct select_system_fn final { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE auto operator()( thrust::detail::execution_policy_base const& exec0 ) const @@ -57,9 +57,9 @@ struct select_system_fn final ) ) - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE auto operator()( thrust::detail::execution_policy_base const& exec0 , thrust::detail::execution_policy_base const& exec1 diff --git a/thrust/detail/seq.h b/thrust/detail/seq.h index 54f052d36..28279fbf9 100644 --- a/thrust/detail/seq.h +++ b/thrust/detail/seq.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2018 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,12 +30,12 @@ struct seq_t : thrust::system::detail::sequential::execution_policy, thrust::detail::allocator_aware_execution_policy< thrust::system::detail::sequential::execution_policy> { - __host__ __device__ + THRUST_HOST_DEVICE constexpr seq_t() : thrust::system::detail::sequential::execution_policy() {} // allow any execution_policy to convert to seq_t template - __host__ __device__ + THRUST_HOST_DEVICE seq_t(const thrust::execution_policy &) : thrust::system::detail::sequential::execution_policy() {} diff --git a/thrust/detail/sequence.inl b/thrust/detail/sequence.inl index ffc9b968b..2e9c9ccae 100644 --- a/thrust/detail/sequence.inl +++ b/thrust/detail/sequence.inl @@ -26,9 +26,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) @@ -38,9 +38,9 @@ __host__ __device__ } // end sequence() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -51,9 +51,9 @@ __host__ __device__ } // end sequence() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/detail/set_operations.inl b/thrust/detail/set_operations.inl index 7915f7b3e..66ca51c43 100644 --- a/thrust/detail/set_operations.inl +++ b/thrust/detail/set_operations.inl @@ -25,12 +25,12 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -43,13 +43,13 @@ OutputIterator set_difference(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -63,7 +63,7 @@ OutputIterator set_difference(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -88,7 +88,7 @@ thrust::pair } // end set_difference_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -115,12 +115,12 @@ thrust::pair } // end set_difference_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -133,13 +133,13 @@ OutputIterator set_intersection(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -153,14 +153,14 @@ OutputIterator set_intersection(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -176,7 +176,7 @@ thrust::pair } // end set_intersection_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -201,12 +201,12 @@ thrust::pair } // end set_intersection_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -219,13 +219,13 @@ OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_b } // end set_symmetric_difference() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -239,7 +239,7 @@ OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_b } // end set_symmetric_difference() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -264,7 +264,7 @@ thrust::pair } // end set_symmetric_difference_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -291,12 +291,12 @@ thrust::pair } // end set_symmetric_difference_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -309,13 +309,13 @@ OutputIterator set_union(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -329,7 +329,7 @@ OutputIterator set_union(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -354,7 +354,7 @@ thrust::pair } // end set_union_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, diff --git a/thrust/detail/shuffle.inl b/thrust/detail/shuffle.inl index 48f5ba639..9a0c3004b 100644 --- a/thrust/detail/shuffle.inl +++ b/thrust/detail/shuffle.inl @@ -28,9 +28,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ void shuffle( +THRUST_HOST_DEVICE void shuffle( const thrust::detail::execution_policy_base& exec, RandomIterator first, RandomIterator last, URBG&& g) { using thrust::system::detail::generic::shuffle; @@ -40,7 +40,7 @@ __host__ __device__ void shuffle( } template -__host__ __device__ void shuffle(RandomIterator first, RandomIterator last, +THRUST_HOST_DEVICE void shuffle(RandomIterator first, RandomIterator last, URBG&& g) { using thrust::system::detail::generic::select_system; @@ -50,10 +50,10 @@ __host__ __device__ void shuffle(RandomIterator first, RandomIterator last, return thrust::shuffle(select_system(system), first, last, g); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ void shuffle_copy( +THRUST_HOST_DEVICE void shuffle_copy( const thrust::detail::execution_policy_base& exec, RandomIterator first, RandomIterator last, OutputIterator result, URBG&& g) { @@ -64,7 +64,7 @@ __host__ __device__ void shuffle_copy( } template -__host__ __device__ void shuffle_copy(RandomIterator first, RandomIterator last, +THRUST_HOST_DEVICE void shuffle_copy(RandomIterator first, RandomIterator last, OutputIterator result, URBG&& g) { using thrust::system::detail::generic::select_system; diff --git a/thrust/detail/sort.inl b/thrust/detail/sort.inl index 53f8bad93..dc17a3d23 100644 --- a/thrust/detail/sort.inl +++ b/thrust/detail/sort.inl @@ -26,9 +26,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last) @@ -38,11 +38,11 @@ __host__ __device__ } // end sort() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -53,9 +53,9 @@ __host__ __device__ } // end sort() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last) @@ -65,11 +65,11 @@ __host__ __device__ } // end stable_sort() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -80,11 +80,11 @@ __host__ __device__ } // end stable_sort() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -95,12 +95,12 @@ __host__ __device__ } // end sort_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -112,11 +112,11 @@ __host__ __device__ } // end sort_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -127,12 +127,12 @@ __host__ __device__ } // end stable_sort_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -144,9 +144,9 @@ __host__ __device__ } // end stable_sort_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) @@ -156,9 +156,9 @@ __host__ __device__ } // end is_sorted() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -169,9 +169,9 @@ __host__ __device__ } // end is_sorted() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) @@ -181,9 +181,9 @@ __host__ __device__ } // end is_sorted_until() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -214,7 +214,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void sort(RandomAccessIterator first, RandomAccessIterator last, StrictWeakOrdering comp) diff --git a/thrust/detail/static_map.h b/thrust/detail/static_map.h index 9f0d79e83..48befccae 100644 --- a/thrust/detail/static_map.h +++ b/thrust/detail/static_map.h @@ -46,7 +46,7 @@ struct cons template - __host__ __device__ + THRUST_HOST_DEVICE static unsigned int get(unsigned int key) { return (key == Head::key) ? (Head::value) : Tail::template get(key); @@ -64,7 +64,7 @@ struct cons }; template - __host__ __device__ + THRUST_HOST_DEVICE static unsigned int get(unsigned int key) { return (key == Head::key) ? (Head::value) : default_value; @@ -114,7 +114,7 @@ struct static_map static const unsigned int value = impl::template static_get::value; }; - __host__ __device__ + THRUST_HOST_DEVICE static unsigned int get(unsigned int key) { return impl::template get(key); @@ -157,7 +157,7 @@ struct static_lookup template -__host__ __device__ +THRUST_HOST_DEVICE unsigned int lookup(unsigned int key) { return StaticMap::get(key); diff --git a/thrust/detail/swap.h b/thrust/detail/swap.h index 305750f8a..04dd8b615 100644 --- a/thrust/detail/swap.h +++ b/thrust/detail/swap.h @@ -21,9 +21,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline void swap(Assignable1 &a, Assignable2 &b) { Assignable1 temp = a; diff --git a/thrust/detail/swap_ranges.inl b/thrust/detail/swap_ranges.inl index 1f35c1ff3..57570860d 100644 --- a/thrust/detail/swap_ranges.inl +++ b/thrust/detail/swap_ranges.inl @@ -27,11 +27,11 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base &exec, ForwardIterator1 first1, ForwardIterator1 last1, diff --git a/thrust/detail/tabulate.inl b/thrust/detail/tabulate.inl index 308be061f..de1aa50b5 100644 --- a/thrust/detail/tabulate.inl +++ b/thrust/detail/tabulate.inl @@ -26,9 +26,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void tabulate(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/detail/temporary_array.h b/thrust/detail/temporary_array.h index c4d767c05..38648c77a 100644 --- a/thrust/detail/temporary_array.h +++ b/thrust/detail/temporary_array.h @@ -69,43 +69,43 @@ template public: typedef typename super_t::size_type size_type; - __host__ __device__ + THRUST_HOST_DEVICE temporary_array(thrust::execution_policy &system); - __host__ __device__ + THRUST_HOST_DEVICE temporary_array(thrust::execution_policy &system, size_type n); // provide a kill-switch to explicitly avoid initialization - __host__ __device__ + THRUST_HOST_DEVICE temporary_array(int uninit, thrust::execution_policy &system, size_type n); template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array(thrust::execution_policy &system, InputIterator first, size_type n); template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array(thrust::execution_policy &system, thrust::execution_policy &input_system, InputIterator first, size_type n); template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array(thrust::execution_policy &system, InputIterator first, InputIterator last); template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array(thrust::execution_policy &system, thrust::execution_policy &input_system, InputIterator first, InputIterator last); - __host__ __device__ + THRUST_HOST_DEVICE ~temporary_array(); }; // end temporary_array diff --git a/thrust/detail/temporary_array.inl b/thrust/detail/temporary_array.inl index 90b7279ac..761864a8b 100644 --- a/thrust/detail/temporary_array.inl +++ b/thrust/detail/temporary_array.inl @@ -35,7 +35,7 @@ template struct avoid_initialization : thrust::detail::has_trivial_c template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if< avoid_initialization::value >::type @@ -47,7 +47,7 @@ typename thrust::detail::enable_if< template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::disable_if< avoid_initialization::value >::type @@ -62,7 +62,7 @@ typename thrust::detail::disable_if< template -__host__ __device__ +THRUST_HOST_DEVICE temporary_array ::temporary_array(thrust::execution_policy &system) :super_t(alloc_type(temporary_allocator(system))) @@ -71,7 +71,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE temporary_array ::temporary_array(thrust::execution_policy &system, size_type n) :super_t(n, alloc_type(temporary_allocator(system))) @@ -81,7 +81,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE temporary_array ::temporary_array(int, thrust::execution_policy &system, size_type n) :super_t(n, alloc_type(temporary_allocator(system))) @@ -93,7 +93,7 @@ __host__ __device__ template template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array ::temporary_array(thrust::execution_policy &system, InputIterator first, @@ -108,7 +108,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array ::temporary_array(thrust::execution_policy &system, thrust::execution_policy &input_system, @@ -124,7 +124,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array ::temporary_array(thrust::execution_policy &system, InputIterator first, @@ -139,7 +139,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE temporary_array ::temporary_array(thrust::execution_policy &system, thrust::execution_policy &input_system, @@ -154,7 +154,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE temporary_array ::~temporary_array() { diff --git a/thrust/detail/temporary_buffer.h b/thrust/detail/temporary_buffer.h index 863908223..fb33c9dc8 100644 --- a/thrust/detail/temporary_buffer.h +++ b/thrust/detail/temporary_buffer.h @@ -32,7 +32,7 @@ namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair, typename thrust::pointer::difference_type> down_cast_pair(Pair p) { @@ -47,9 +47,9 @@ __host__ __device__ } // end detail -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair, typename thrust::pointer::difference_type> get_temporary_buffer(const thrust::detail::execution_policy_base &exec, typename thrust::pointer::difference_type n) { @@ -60,9 +60,9 @@ __host__ __device__ } // end get_temporary_buffer() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void return_temporary_buffer(const thrust::detail::execution_policy_base &exec, Pointer p, std::ptrdiff_t n) { using thrust::detail::return_temporary_buffer; // execute_with_allocator diff --git a/thrust/detail/transform.inl b/thrust/detail/transform.inl index 62bafd35e..60482e83c 100644 --- a/thrust/detail/transform.inl +++ b/thrust/detail/transform.inl @@ -27,12 +27,12 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, @@ -43,13 +43,13 @@ __host__ __device__ } // end transform() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, @@ -61,13 +61,13 @@ __host__ __device__ } // end transform() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, ForwardIterator result, @@ -79,14 +79,14 @@ __host__ __device__ } // end transform_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, InputIterator2 stencil, @@ -99,7 +99,7 @@ __host__ __device__ } // end transform_if() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, diff --git a/thrust/detail/transform_reduce.inl b/thrust/detail/transform_reduce.inl index 702dd9f73..ab23ae28a 100644 --- a/thrust/detail/transform_reduce.inl +++ b/thrust/detail/transform_reduce.inl @@ -25,13 +25,13 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputType transform_reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, diff --git a/thrust/detail/transform_scan.inl b/thrust/detail/transform_scan.inl index 957001cef..d33ee4443 100644 --- a/thrust/detail/transform_scan.inl +++ b/thrust/detail/transform_scan.inl @@ -27,13 +27,13 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -46,14 +46,14 @@ __host__ __device__ } // end transform_inclusive_scan() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, diff --git a/thrust/detail/trivial_sequence.h b/thrust/detail/trivial_sequence.h index 2cf98e787..cb062f312 100644 --- a/thrust/detail/trivial_sequence.h +++ b/thrust/detail/trivial_sequence.h @@ -46,15 +46,15 @@ struct _trivial_sequence typedef Iterator iterator_type; Iterator first, last; - __host__ __device__ + THRUST_HOST_DEVICE _trivial_sequence(thrust::execution_policy &, Iterator _first, Iterator _last) : first(_first), last(_last) { } - __host__ __device__ + THRUST_HOST_DEVICE iterator_type begin() { return first; } - __host__ __device__ + THRUST_HOST_DEVICE iterator_type end() { return last; } }; @@ -67,16 +67,16 @@ struct _trivial_sequence thrust::detail::temporary_array buffer; - __host__ __device__ + THRUST_HOST_DEVICE _trivial_sequence(thrust::execution_policy &exec, Iterator first, Iterator last) : buffer(exec, first, last) { } - __host__ __device__ + THRUST_HOST_DEVICE iterator_type begin() { return buffer.begin(); } - __host__ __device__ + THRUST_HOST_DEVICE iterator_type end() { return buffer.end(); } }; @@ -86,7 +86,7 @@ struct trivial_sequence { typedef _trivial_sequence::type> super_t; - __host__ __device__ + THRUST_HOST_DEVICE trivial_sequence(thrust::execution_policy &exec, Iterator first, Iterator last) : super_t(exec, first, last) { } }; diff --git a/thrust/detail/tuple.inl b/thrust/detail/tuple.inl index af8ec83e8..d64a31cfc 100644 --- a/thrust/detail/tuple.inl +++ b/thrust/detail/tuple.inl @@ -27,22 +27,22 @@ THRUST_NAMESPACE_BEGIN struct null_type {}; // null_type comparisons -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator==(const null_type&, const null_type&) { return true; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator>=(const null_type&, const null_type&) { return true; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator<=(const null_type&, const null_type&) { return true; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator!=(const null_type&, const null_type&) { return false; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator<(const null_type&, const null_type&) { return false; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator>(const null_type&, const null_type&) { return false; } // forward declaration for tuple @@ -175,7 +175,7 @@ template struct access_traits // forward declarations of get() template -__host__ __device__ +THRUST_HOST_DEVICE inline typename access_traits< typename tuple_element >::type >::non_const_type @@ -184,7 +184,7 @@ inline typename access_traits< get(detail::cons& c); template -__host__ __device__ +THRUST_HOST_DEVICE inline typename access_traits< typename tuple_element >::type >::const_type @@ -207,7 +207,7 @@ template< int N > struct get_class { template - __host__ __device__ + THRUST_HOST_DEVICE inline static RET get(const cons& t) { // XXX we may not need to deal with this for any compiler we care about -jph @@ -219,7 +219,7 @@ struct get_class } template - __host__ __device__ + THRUST_HOST_DEVICE inline static RET get(cons& t) { // XXX we may not need to deal with this for any compiler we care about -jph @@ -235,14 +235,14 @@ template<> struct get_class<0> { template - __host__ __device__ + THRUST_HOST_DEVICE inline static RET get(const cons& t) { return t.head; } template - __host__ __device__ + THRUST_HOST_DEVICE inline static RET get(cons& t) { return t.head; @@ -268,7 +268,7 @@ template struct IF template class non_storeable_type { - __host__ __device__ + THRUST_HOST_DEVICE non_storeable_type(); }; @@ -300,23 +300,23 @@ template stored_head_type head; tail_type tail; - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename access_traits::non_const_type get_head() { return head; } - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename access_traits::non_const_type get_tail() { return tail; } - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename access_traits::const_type get_head() const { return head; } - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename access_traits::const_type get_tail() const { return tail; } - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons(void) : head(), tail() {} // cons() : head(detail::default_arg::f()), tail() {} @@ -325,14 +325,14 @@ template // cannot be supported properly in any case (no assignment, // copy works only if the tails are exactly the same type, ...) - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons(typename access_traits::parameter_type h, const tail_type& t) : head (h), tail(t) {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons( T1& t1, T2& t2, T3& t3, T4& t4, T5& t5, T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 ) : head (t1), @@ -341,7 +341,7 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons( const null_type& /*t1*/, T2& t2, T3& t3, T4& t4, T5& t5, T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 ) : head (), @@ -350,31 +350,31 @@ template template - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons( const cons& u ) : head(u.head), tail(u.tail) {} #if THRUST_CPP_DIALECT >= 2011 cons(const cons &) = default; #endif - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons& operator=( const cons& u ) { head=u.head; tail=u.tail; return *this; } // must define assignment operator explicitly, implicit version is // illformed if HT is a reference (12.8. (12)) - __thrust_exec_check_disable__ - inline __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + inline THRUST_HOST_DEVICE cons& operator=(const cons& u) { head = u.head; tail = u.tail; return *this; } // XXX enable when we support std::pair -jph //template - //__host__ __device__ + //THRUST_HOST_DEVICE //cons& operator=( const std::pair& u ) { // //BOOST_STATIC_ASSERT(length::value == 2); // check length = 2 // head = u.first; tail.head = u.second; return *this; @@ -382,7 +382,7 @@ template // get member functions (non-const and const) template - __host__ __device__ + THRUST_HOST_DEVICE typename access_traits< typename tuple_element >::type >::non_const_type @@ -391,7 +391,7 @@ template } template - __host__ __device__ + THRUST_HOST_DEVICE typename access_traits< typename tuple_element >::type >::const_type @@ -399,7 +399,7 @@ template return thrust::get(*this); // delegate to non-member get } - inline __host__ __device__ + inline THRUST_HOST_DEVICE void swap(cons &c) { using thrust::swap; @@ -421,35 +421,35 @@ template stored_head_type head; typename access_traits::non_const_type - inline __host__ __device__ + inline THRUST_HOST_DEVICE get_head() { return head; } - inline __host__ __device__ + inline THRUST_HOST_DEVICE null_type get_tail() { return null_type(); } - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename access_traits::const_type get_head() const { return head; } - inline __host__ __device__ + inline THRUST_HOST_DEVICE null_type get_tail() const { return null_type(); } - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons() : head() {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons(typename access_traits::parameter_type h, const null_type& = null_type()) : head (h) {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons(T1& t1, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&) : head (t1) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons(const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, const null_type&, @@ -457,16 +457,16 @@ template : head () {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons( const cons& u ) : head(u.head) {} #if THRUST_CPP_DIALECT >= 2011 cons(const cons &) = default; #endif - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons& operator=(const cons& u ) { head = u.head; @@ -475,11 +475,11 @@ template // must define assignment operator explicitly, implicit version // is illformed if HT is a reference - inline __host__ __device__ + inline THRUST_HOST_DEVICE cons& operator=(const cons& u) { head = u.head; return *this; } template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename access_traits< typename tuple_element::type >::non_const_type @@ -491,7 +491,7 @@ template } template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename access_traits< typename tuple_element::type >::const_type @@ -502,7 +502,7 @@ template return thrust::get(*this); } - inline __host__ __device__ + inline THRUST_HOST_DEVICE void swap(cons &c) { using thrust::swap; @@ -635,7 +635,7 @@ struct make_tuple_mapper { template -__host__ __device__ +THRUST_HOST_DEVICE inline typename access_traits< typename tuple_element >::type >::non_const_type @@ -660,7 +660,7 @@ get(detail::cons& c) // the element. If the element is a reference, returns the reference // as such (that is, can return a non-const reference) template -__host__ __device__ +THRUST_HOST_DEVICE inline typename access_traits< typename tuple_element >::type >::const_type @@ -682,7 +682,7 @@ get(const detail::cons& c) template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0) { @@ -691,7 +691,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1) { @@ -700,7 +700,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2) { @@ -709,7 +709,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3) { @@ -718,7 +718,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4) { @@ -727,7 +727,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5) { @@ -736,7 +736,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6) { @@ -745,7 +745,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7) { @@ -754,7 +754,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8) { @@ -763,7 +763,7 @@ __host__ __device__ inline } // end make_tuple() template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9) { @@ -773,70 +773,70 @@ __host__ __device__ inline template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0) { return tuple(t0); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1) { return tuple(t0,t1); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2) { return tuple(t0,t1,t2); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3) { return tuple(t0,t1,t2,t3); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4) { return tuple(t0,t1,t2,t3,t4); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5) { return tuple(t0,t1,t2,t3,t4,t5); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6) { return tuple(t0,t1,t2,t3,t4,t5,t6); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7) { return tuple(t0,t1,t2,t3,t4,t5,t6,t7); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8) { return tuple(t0,t1,t2,t3,t4,t5,t6,t7,t8); } template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9) { return tuple(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9); @@ -846,7 +846,7 @@ template< typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9 > -__host__ __device__ inline +THRUST_HOST_DEVICE inline void swap(thrust::tuple &x, thrust::tuple &y) { @@ -859,67 +859,67 @@ namespace detail { template -__host__ __device__ +THRUST_HOST_DEVICE inline bool eq(const T1& lhs, const T2& rhs) { return lhs.get_head() == rhs.get_head() && eq(lhs.get_tail(), rhs.get_tail()); } template<> -__host__ __device__ +THRUST_HOST_DEVICE inline bool eq(const null_type&, const null_type&) { return true; } template -__host__ __device__ +THRUST_HOST_DEVICE inline bool neq(const T1& lhs, const T2& rhs) { return lhs.get_head() != rhs.get_head() || neq(lhs.get_tail(), rhs.get_tail()); } template<> -__host__ __device__ +THRUST_HOST_DEVICE inline bool neq(const null_type&, const null_type&) { return false; } template -__host__ __device__ +THRUST_HOST_DEVICE inline bool lt(const T1& lhs, const T2& rhs) { return (lhs.get_head() < rhs.get_head()) || (!(rhs.get_head() < lhs.get_head()) && lt(lhs.get_tail(), rhs.get_tail())); } template<> -__host__ __device__ +THRUST_HOST_DEVICE inline bool lt(const null_type&, const null_type&) { return false; } template -__host__ __device__ +THRUST_HOST_DEVICE inline bool gt(const T1& lhs, const T2& rhs) { return (lhs.get_head() > rhs.get_head()) || (!(rhs.get_head() > lhs.get_head()) && gt(lhs.get_tail(), rhs.get_tail())); } template<> -__host__ __device__ +THRUST_HOST_DEVICE inline bool gt(const null_type&, const null_type&) { return false; } template -__host__ __device__ +THRUST_HOST_DEVICE inline bool lte(const T1& lhs, const T2& rhs) { return lhs.get_head() <= rhs.get_head() && ( !(rhs.get_head() <= lhs.get_head()) || lte(lhs.get_tail(), rhs.get_tail())); } template<> -__host__ __device__ +THRUST_HOST_DEVICE inline bool lte(const null_type&, const null_type&) { return true; } template -__host__ __device__ +THRUST_HOST_DEVICE inline bool gte(const T1& lhs, const T2& rhs) { return lhs.get_head() >= rhs.get_head() && ( !(rhs.get_head() >= lhs.get_head()) || gte(lhs.get_tail(), rhs.get_tail())); } template<> -__host__ __device__ +THRUST_HOST_DEVICE inline bool gte(const null_type&, const null_type&) { return true; } } // end detail @@ -929,7 +929,7 @@ inline bool gte(const null_type&, const null_type&) { retur // equal ---- template -__host__ __device__ +THRUST_HOST_DEVICE inline bool operator==(const detail::cons& lhs, const detail::cons& rhs) { // XXX support this eventually -jph @@ -942,7 +942,7 @@ inline bool operator==(const detail::cons& lhs, const detail::cons -__host__ __device__ +THRUST_HOST_DEVICE inline bool operator!=(const detail::cons& lhs, const detail::cons& rhs) { // XXX support this eventually -jph @@ -954,7 +954,7 @@ inline bool operator!=(const detail::cons& lhs, const detail::cons -__host__ __device__ +THRUST_HOST_DEVICE inline bool operator<(const detail::cons& lhs, const detail::cons& rhs) { // XXX support this eventually -jph @@ -966,7 +966,7 @@ inline bool operator<(const detail::cons& lhs, const detail::cons template -__host__ __device__ +THRUST_HOST_DEVICE inline bool operator>(const detail::cons& lhs, const detail::cons& rhs) { // XXX support this eventually -jph @@ -978,7 +978,7 @@ inline bool operator>(const detail::cons& lhs, const detail::cons -__host__ __device__ +THRUST_HOST_DEVICE inline bool operator<=(const detail::cons& lhs, const detail::cons& rhs) { // XXX support this eventually -jph @@ -990,7 +990,7 @@ inline bool operator<=(const detail::cons& lhs, const detail::cons= template -__host__ __device__ +THRUST_HOST_DEVICE inline bool operator>=(const detail::cons& lhs, const detail::cons& rhs) { // XXX support this eventually -jph diff --git a/thrust/detail/tuple_transform.h b/thrust/detail/tuple_transform.h index 1011d5179..a7570997e 100644 --- a/thrust/detail/tuple_transform.h +++ b/thrust/detail/tuple_transform.h @@ -39,7 +39,7 @@ template struct tuple_transform_functor> { - static __host__ + static THRUST_HOST typename tuple_meta_transform::type do_it_on_the_host(const Tuple &t, UnaryFunction f) { @@ -48,7 +48,7 @@ template(t))...); } - static __host__ __device__ + static THRUST_HOST_DEVICE typename tuple_meta_transform::type do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) { @@ -72,7 +72,7 @@ template class UnaryMetaFunction, typename Tuple, typename UnaryFunction> typename tuple_meta_transform::type -__host__ __device__ +THRUST_HOST_DEVICE tuple_host_device_transform(const Tuple &t, UnaryFunction f) { return tuple_transform_functor::do_it_on_the_host_or_device(t,f); diff --git a/thrust/detail/type_traits.h b/thrust/detail/type_traits.h index 7751584a5..2eac468cd 100644 --- a/thrust/detail/type_traits.h +++ b/thrust/detail/type_traits.h @@ -64,11 +64,11 @@ template integral_constant& operator=(integral_constant const&) = default; - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE integral_constant(std::integral_constant) noexcept {} - constexpr __host__ __device__ operator value_type() const noexcept { return value; } - constexpr __host__ __device__ value_type operator()() const noexcept { return value; } + constexpr THRUST_HOST_DEVICE operator value_type() const noexcept { return value; } + constexpr THRUST_HOST_DEVICE value_type operator()() const noexcept { return value; } }; /// typedef for true_type diff --git a/thrust/detail/type_traits/pointer_traits.h b/thrust/detail/type_traits/pointer_traits.h index 90a8bc29d..7eb14e8bc 100644 --- a/thrust/detail/type_traits/pointer_traits.h +++ b/thrust/detail/type_traits/pointer_traits.h @@ -168,12 +168,12 @@ template struct capture_address { template - __host__ __device__ + THRUST_HOST_DEVICE capture_address(T &r) : m_addr(&r) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE Void *operator&() const { return m_addr; @@ -208,7 +208,7 @@ template typedef typename rebind_pointer::type other; }; - __host__ __device__ + THRUST_HOST_DEVICE inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param::type r) { // XXX this is supposed to be pointer::pointer_to(&r); (i.e., call a static member function of pointer called pointer_to) @@ -220,7 +220,7 @@ template // thrust additions follow typedef typename pointer_raw_pointer::type raw_pointer; - __host__ __device__ + THRUST_HOST_DEVICE inline static raw_pointer get(pointer ptr) { return ptr.get(); @@ -241,7 +241,7 @@ template typedef U* other; }; - __host__ __device__ + THRUST_HOST_DEVICE inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param::type r) { return &r; @@ -250,7 +250,7 @@ template // thrust additions follow typedef typename pointer_raw_pointer::type raw_pointer; - __host__ __device__ + THRUST_HOST_DEVICE inline static raw_pointer get(pointer ptr) { return ptr; @@ -271,7 +271,7 @@ template<> typedef U* other; }; - __host__ __device__ + THRUST_HOST_DEVICE inline static pointer pointer_to(pointer_traits_detail::pointer_to_param::type r) { return &r; @@ -280,7 +280,7 @@ template<> // thrust additions follow typedef pointer_raw_pointer::type raw_pointer; - __host__ __device__ + THRUST_HOST_DEVICE inline static raw_pointer get(pointer ptr) { return ptr; @@ -301,7 +301,7 @@ template<> typedef U* other; }; - __host__ __device__ + THRUST_HOST_DEVICE inline static pointer pointer_to(pointer_traits_detail::pointer_to_param::type r) { return &r; @@ -310,7 +310,7 @@ template<> // thrust additions follow typedef pointer_raw_pointer::type raw_pointer; - __host__ __device__ + THRUST_HOST_DEVICE inline static raw_pointer get(pointer ptr) { return ptr; diff --git a/thrust/detail/uninitialized_copy.inl b/thrust/detail/uninitialized_copy.inl index 2778693ad..7b6bdba3d 100644 --- a/thrust/detail/uninitialized_copy.inl +++ b/thrust/detail/uninitialized_copy.inl @@ -27,9 +27,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -40,9 +40,9 @@ __host__ __device__ } // end uninitialized_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base &exec, InputIterator first, Size n, diff --git a/thrust/detail/uninitialized_fill.inl b/thrust/detail/uninitialized_fill.inl index e013dac7b..686cf31ec 100644 --- a/thrust/detail/uninitialized_fill.inl +++ b/thrust/detail/uninitialized_fill.inl @@ -27,9 +27,9 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void uninitialized_fill(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -40,9 +40,9 @@ __host__ __device__ } // end uninitialized_fill() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base &exec, ForwardIterator first, Size n, diff --git a/thrust/detail/unique.inl b/thrust/detail/unique.inl index ac5475f02..c8b79c516 100644 --- a/thrust/detail/unique.inl +++ b/thrust/detail/unique.inl @@ -28,10 +28,10 @@ THRUST_NAMESPACE_BEGIN -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) @@ -41,11 +41,11 @@ ForwardIterator unique(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -56,11 +56,11 @@ ForwardIterator unique(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -71,12 +71,12 @@ OutputIterator unique_copy(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -88,11 +88,11 @@ OutputIterator unique_copy(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(const thrust::detail::execution_policy_base &exec, ForwardIterator1 keys_first, @@ -104,12 +104,12 @@ __host__ __device__ } // end unique_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(const thrust::detail::execution_policy_base &exec, ForwardIterator1 keys_first, @@ -122,13 +122,13 @@ __host__ __device__ } // end unique_by_key() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -142,14 +142,14 @@ __host__ __device__ } // end unique_by_key_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -327,11 +327,11 @@ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -342,10 +342,10 @@ __host__ __device__ return unique_count(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, binary_pred); } // end unique_count() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -355,10 +355,10 @@ __host__ __device__ return unique_count(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); } // end unique_count() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(ForwardIterator first, ForwardIterator last, @@ -373,9 +373,9 @@ __host__ __device__ return thrust::unique_count(select_system(system), first, last, binary_pred); } // end unique_count() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(ForwardIterator first, ForwardIterator last) diff --git a/thrust/detail/util/align.h b/thrust/detail/util/align.h index a3aa75bfe..fa28d0154 100644 --- a/thrust/detail/util/align.h +++ b/thrust/detail/util/align.h @@ -31,7 +31,7 @@ namespace util template -__host__ __device__ +THRUST_HOST_DEVICE T *align_up(T * ptr, detail::uintptr_t bytes) { return (T *) ( bytes * (((detail::uintptr_t) ptr + (bytes - 1)) / bytes) ); @@ -39,7 +39,7 @@ T *align_up(T * ptr, detail::uintptr_t bytes) template -__host__ __device__ +THRUST_HOST_DEVICE T *align_down(T * ptr, detail::uintptr_t bytes) { return (T *) ( bytes * (detail::uintptr_t(ptr) / bytes) ); @@ -47,7 +47,7 @@ T *align_down(T * ptr, detail::uintptr_t bytes) template -__host__ __device__ +THRUST_HOST_DEVICE bool is_aligned(T * ptr, detail::uintptr_t bytes = sizeof(T)) { return detail::uintptr_t(ptr) % bytes == 0; diff --git a/thrust/detail/vector_base.h b/thrust/detail/vector_base.h index f8962b6be..4e1cffea9 100644 --- a/thrust/detail/vector_base.h +++ b/thrust/detail/vector_base.h @@ -222,13 +222,13 @@ template /*! Returns the number of elements in this vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE size_type size(void) const; /*! Returns the size() of the largest possible vector_base. * \return The largest possible return value of size(). */ - __host__ __device__ + THRUST_HOST_DEVICE size_type max_size(void) const; /*! \brief If n is less than or equal to capacity(), this call has no effect. @@ -242,7 +242,7 @@ template /*! Returns the number of elements which have been reserved in this * vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE size_type capacity(void) const; /*! This method shrinks the capacity of this vector_base to exactly @@ -258,7 +258,7 @@ template * Note that data access with this operator is unchecked and * out_of_range lookups are not defined. */ - __host__ __device__ + THRUST_HOST_DEVICE reference operator[](size_type n); /*! \brief Subscript read access to the data contained in this vector_dev. @@ -269,28 +269,28 @@ template * Note that data access with this operator is unchecked and * out_of_range lookups are not defined. */ - __host__ __device__ + THRUST_HOST_DEVICE const_reference operator[](size_type n) const; /*! This method returns an iterator pointing to the beginning of * this vector_base. * \return mStart */ - __host__ __device__ + THRUST_HOST_DEVICE iterator begin(void); /*! This method returns a const_iterator pointing to the beginning * of this vector_base. * \return mStart */ - __host__ __device__ + THRUST_HOST_DEVICE const_iterator begin(void) const; /*! This method returns a const_iterator pointing to the beginning * of this vector_base. * \return mStart */ - __host__ __device__ + THRUST_HOST_DEVICE const_iterator cbegin(void) const; /*! This method returns a reverse_iterator pointing to the beginning of @@ -298,7 +298,7 @@ template * \return A reverse_iterator pointing to the beginning of this * vector_base's reversed sequence. */ - __host__ __device__ + THRUST_HOST_DEVICE reverse_iterator rbegin(void); /*! This method returns a const_reverse_iterator pointing to the beginning of @@ -306,7 +306,7 @@ template * \return A const_reverse_iterator pointing to the beginning of this * vector_base's reversed sequence. */ - __host__ __device__ + THRUST_HOST_DEVICE const_reverse_iterator rbegin(void) const; /*! This method returns a const_reverse_iterator pointing to the beginning of @@ -314,89 +314,89 @@ template * \return A const_reverse_iterator pointing to the beginning of this * vector_base's reversed sequence. */ - __host__ __device__ + THRUST_HOST_DEVICE const_reverse_iterator crbegin(void) const; /*! This method returns an iterator pointing to one element past the * last of this vector_base. * \return begin() + size(). */ - __host__ __device__ + THRUST_HOST_DEVICE iterator end(void); /*! This method returns a const_iterator pointing to one element past the * last of this vector_base. * \return begin() + size(). */ - __host__ __device__ + THRUST_HOST_DEVICE const_iterator end(void) const; /*! This method returns a const_iterator pointing to one element past the * last of this vector_base. * \return begin() + size(). */ - __host__ __device__ + THRUST_HOST_DEVICE const_iterator cend(void) const; /*! This method returns a reverse_iterator pointing to one element past the * last of this vector_base's reversed sequence. * \return rbegin() + size(). */ - __host__ __device__ + THRUST_HOST_DEVICE reverse_iterator rend(void); /*! This method returns a const_reverse_iterator pointing to one element past the * last of this vector_base's reversed sequence. * \return rbegin() + size(). */ - __host__ __device__ + THRUST_HOST_DEVICE const_reverse_iterator rend(void) const; /*! This method returns a const_reverse_iterator pointing to one element past the * last of this vector_base's reversed sequence. * \return rbegin() + size(). */ - __host__ __device__ + THRUST_HOST_DEVICE const_reverse_iterator crend(void) const; /*! This method returns a const_reference referring to the first element of this * vector_base. * \return The first element of this vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE const_reference front(void) const; /*! This method returns a reference pointing to the first element of this * vector_base. * \return The first element of this vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE reference front(void); /*! This method returns a const reference pointing to the last element of * this vector_base. * \return The last element of this vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE const_reference back(void) const; /*! This method returns a reference referring to the last element of * this vector_dev. * \return The last element of this vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE reference back(void); /*! This method returns a pointer to this vector_base's first element. * \return A pointer to the first element of this vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE pointer data(void); /*! This method returns a const_pointer to this vector_base's first element. * \return a const_pointer to the first element of this vector_base. */ - __host__ __device__ + THRUST_HOST_DEVICE const_pointer data(void) const; /*! This method resizes this vector_base to 0. @@ -406,7 +406,7 @@ template /*! This method returns true iff size() == 0. * \return true if size() == 0; false, otherwise. */ - __host__ __device__ + THRUST_HOST_DEVICE bool empty(void) const; /*! This method appends the given element to the end of this vector_base. diff --git a/thrust/detail/vector_base.inl b/thrust/detail/vector_base.inl index 9ed4c3938..c75e6fd95 100644 --- a/thrust/detail/vector_base.inl +++ b/thrust/detail/vector_base.inl @@ -367,7 +367,7 @@ template } // end vector_base::resize() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::size_type vector_base ::size(void) const @@ -376,7 +376,7 @@ template } // end vector_base::size() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::size_type vector_base ::max_size(void) const @@ -426,7 +426,7 @@ template } // end vector_base::reserve() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::size_type vector_base ::capacity(void) const @@ -443,7 +443,7 @@ template } // end vector_base::shrink_to_fit() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::reference vector_base ::operator[](const size_type n) @@ -452,7 +452,7 @@ template } // end vector_base::operator[] template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_reference vector_base ::operator[](const size_type n) const @@ -461,7 +461,7 @@ template } // end vector_base::operator[] template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::iterator vector_base ::begin(void) @@ -470,7 +470,7 @@ template } // end vector_base::begin() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_iterator vector_base ::begin(void) const @@ -479,7 +479,7 @@ template } // end vector_base::begin() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_iterator vector_base ::cbegin(void) const @@ -488,7 +488,7 @@ template } // end vector_base::cbegin() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::reverse_iterator vector_base ::rbegin(void) @@ -497,7 +497,7 @@ template } // end vector_base::rbegin() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_reverse_iterator vector_base ::rbegin(void) const @@ -506,7 +506,7 @@ template } // end vector_base::rbegin() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_reverse_iterator vector_base ::crbegin(void) const @@ -515,7 +515,7 @@ template } // end vector_base::crbegin() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::iterator vector_base ::end(void) @@ -526,7 +526,7 @@ template } // end vector_base::end() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_iterator vector_base ::end(void) const @@ -537,7 +537,7 @@ template } // end vector_base::end() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_iterator vector_base ::cend(void) const @@ -546,7 +546,7 @@ template } // end vector_base::cend() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::reverse_iterator vector_base ::rend(void) @@ -555,7 +555,7 @@ template } // end vector_base::rend() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_reverse_iterator vector_base ::rend(void) const @@ -564,7 +564,7 @@ template } // end vector_base::rend() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_reverse_iterator vector_base ::crend(void) const @@ -573,7 +573,7 @@ template } // end vector_base::crend() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_reference vector_base ::front(void) const @@ -582,7 +582,7 @@ template } // end vector_base::front() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::reference vector_base ::front(void) @@ -591,7 +591,7 @@ template } // end vector_base::front() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_reference vector_base ::back(void) const @@ -602,7 +602,7 @@ template } // end vector_base::vector_base template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::reference vector_base ::back(void) @@ -613,7 +613,7 @@ template } // end vector_base::vector_base template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::pointer vector_base ::data(void) @@ -622,7 +622,7 @@ template } // end vector_base::data() template - __host__ __device__ + THRUST_HOST_DEVICE typename vector_base::const_pointer vector_base ::data(void) const @@ -647,7 +647,7 @@ template } // end vector_base::~vector_dev() template - __host__ __device__ + THRUST_HOST_DEVICE bool vector_base ::empty(void) const { diff --git a/thrust/device_allocator.h b/thrust/device_allocator.h index a964c321f..8d9bd1dab 100644 --- a/thrust/device_allocator.h +++ b/thrust/device_allocator.h @@ -53,7 +53,7 @@ class device_ptr_memory_resource final /*! Initialize the adaptor with the global instance of the upstream resource. Obtains * the global instance by calling \p get_global_resource. */ - __host__ + THRUST_HOST device_ptr_memory_resource() : m_upstream(mr::get_global_resource()) { } @@ -62,7 +62,7 @@ class device_ptr_memory_resource final * * \param upstream the upstream memory resource to adapt. */ - __host__ + THRUST_HOST device_ptr_memory_resource(Upstream * upstream) : m_upstream(upstream) { } @@ -73,7 +73,7 @@ class device_ptr_memory_resource final * \param alignment - alignment size, in bytes * \return a pointer to the newly allocated storage. */ - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST virtual pointer do_allocate(std::size_t bytes, std::size_t alignment = THRUST_MR_DEFAULT_ALIGNMENT) override { return pointer(m_upstream->do_allocate(bytes, alignment).get()); @@ -84,7 +84,7 @@ class device_ptr_memory_resource final * \param bytes - size of the allocation, in bytes * \param alignment - alignment size, in bytes */ - __host__ + THRUST_HOST virtual void do_deallocate(pointer p, std::size_t bytes, std::size_t alignment) override { m_upstream->do_deallocate(upstream_ptr(p.get()), bytes, alignment); @@ -126,23 +126,23 @@ class device_allocator }; /*! Default constructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE device_allocator() {} /*! Copy constructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE device_allocator(const device_allocator& other) : base(other) {} /*! Constructor from other \p device_allocator has no effect. */ template - __host__ __device__ + THRUST_HOST_DEVICE device_allocator(const device_allocator& other) : base(other) {} /*! Use the default equality comparator. */ device_allocator & operator=(const device_allocator &) = default; /*! Destructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE ~device_allocator() {} }; diff --git a/thrust/device_make_unique.h b/thrust/device_make_unique.h index 60a487de8..fa6425a03 100644 --- a/thrust/device_make_unique.h +++ b/thrust/device_make_unique.h @@ -40,7 +40,7 @@ THRUST_NAMESPACE_BEGIN * memory. */ template -__host__ +THRUST_HOST auto device_make_unique(Args&&... args) THRUST_TRAILING_RETURN(decltype( uninitialized_allocate_unique(device_allocator{}) diff --git a/thrust/device_malloc_allocator.h b/thrust/device_malloc_allocator.h index a5478f6e8..ad2963fd5 100644 --- a/thrust/device_malloc_allocator.h +++ b/thrust/device_malloc_allocator.h @@ -90,20 +90,20 @@ template }; // end rebind /*! No-argument constructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE inline device_malloc_allocator() {} /*! No-argument destructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE inline ~device_malloc_allocator() {} /*! Copy constructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE inline device_malloc_allocator(device_malloc_allocator const&) {} /*! Constructor from other \p device_malloc_allocator has no effect. */ template - __host__ __device__ + THRUST_HOST_DEVICE inline device_malloc_allocator(device_malloc_allocator const&) {} #if THRUST_CPP_DIALECT >= 2011 @@ -114,13 +114,13 @@ template /*! Returns the address of an allocated object. * \return &r. */ - __host__ __device__ + THRUST_HOST_DEVICE inline pointer address(reference r) { return &r; } /*! Returns the address an allocated object. * \return &r. */ - __host__ __device__ + THRUST_HOST_DEVICE inline const_pointer address(const_reference r) { return &r; } /*! Allocates storage for \p cnt objects. @@ -128,7 +128,7 @@ template * \return A \p pointer to uninitialized storage for \p cnt objects. * \note Memory allocated by this function must be deallocated with \p deallocate. */ - __host__ + THRUST_HOST inline pointer allocate(size_type cnt, const_pointer = const_pointer(static_cast(0))) { @@ -146,7 +146,7 @@ template * \note Memory deallocated by this function must previously have been * allocated with \p allocate. */ - __host__ + THRUST_HOST inline void deallocate(pointer p, size_type cnt) { // silence unused parameter warning while still leaving the parameter name for Doxygen @@ -166,13 +166,13 @@ template /*! Compares against another \p device_malloc_allocator for equality. * \return \c true */ - __host__ __device__ + THRUST_HOST_DEVICE inline bool operator==(device_malloc_allocator const&) const { return true; } /*! Compares against another \p device_malloc_allocator for inequality. * \return \c false */ - __host__ __device__ + THRUST_HOST_DEVICE inline bool operator!=(device_malloc_allocator const &a) const {return !operator==(a); } }; // end device_malloc_allocator diff --git a/thrust/device_new_allocator.h b/thrust/device_new_allocator.h index 645be1c02..a4c415582 100644 --- a/thrust/device_new_allocator.h +++ b/thrust/device_new_allocator.h @@ -81,32 +81,32 @@ template }; // end rebind /*! No-argument constructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE inline device_new_allocator() {} /*! No-argument destructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE inline ~device_new_allocator() {} /*! Copy constructor has no effect. */ - __host__ __device__ + THRUST_HOST_DEVICE inline device_new_allocator(device_new_allocator const&) {} /*! Constructor from other \p device_malloc_allocator has no effect. */ template - __host__ __device__ + THRUST_HOST_DEVICE inline device_new_allocator(device_new_allocator const&) {} /*! Returns the address of an allocated object. * \return &r. */ - __host__ __device__ + THRUST_HOST_DEVICE inline pointer address(reference r) { return &r; } /*! Returns the address an allocated object. * \return &r. */ - __host__ __device__ + THRUST_HOST_DEVICE inline const_pointer address(const_reference r) { return &r; } /*! Allocates storage for \p cnt objects. @@ -114,7 +114,7 @@ template * \return A \p pointer to uninitialized storage for \p cnt objects. * \note Memory allocated by this function must be deallocated with \p deallocate. */ - __host__ + THRUST_HOST inline pointer allocate(size_type cnt, const_pointer = const_pointer(static_cast(0))) { @@ -133,7 +133,7 @@ template * \note Memory deallocated by this function must previously have been * allocated with \p allocate. */ - __host__ + THRUST_HOST inline void deallocate(pointer p, size_type cnt) { // use "::operator delete" rather than keyword delete @@ -144,7 +144,7 @@ template /*! Returns the largest value \c n for which allocate(n) might succeed. * \return The largest value \c n for which allocate(n) might succeed. */ - __host__ __device__ + THRUST_HOST_DEVICE inline size_type max_size() const { return std::numeric_limits::max THRUST_PREVENT_MACRO_SUBSTITUTION () / sizeof(T); @@ -153,13 +153,13 @@ template /*! Compares against another \p device_malloc_allocator for equality. * \return \c true */ - __host__ __device__ + THRUST_HOST_DEVICE inline bool operator==(device_new_allocator const&) { return true; } /*! Compares against another \p device_malloc_allocator for inequality. * \return \c false */ - __host__ __device__ + THRUST_HOST_DEVICE inline bool operator!=(device_new_allocator const &a) {return !operator==(a); } }; // end device_new_allocator diff --git a/thrust/device_ptr.h b/thrust/device_ptr.h index e710637bd..2bb07ad4e 100644 --- a/thrust/device_ptr.h +++ b/thrust/device_ptr.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2021 NVIDIA Corporation - * Modifications Copyright© 2019-2021 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,7 +83,7 @@ class device_ptr * * \post get() == nullptr. */ - __host__ __device__ + THRUST_HOST_DEVICE device_ptr() : super_t() {} /*! \brief Construct a null \c device_ptr. @@ -92,7 +92,7 @@ class device_ptr * * \post get() == nullptr. */ - __host__ __device__ + THRUST_HOST_DEVICE device_ptr(std::nullptr_t) : super_t(nullptr) {} /*! \brief Construct a \c device_ptr from a raw pointer which is @@ -108,7 +108,7 @@ class device_ptr * \post get() == nullptr. */ template - __host__ __device__ + THRUST_HOST_DEVICE explicit device_ptr(U* ptr) : super_t(ptr) {} /*! \brief Copy construct a \c device_ptr from another \c device_ptr whose @@ -122,7 +122,7 @@ class device_ptr * \post get() == other.get(). */ template - __host__ __device__ + THRUST_HOST_DEVICE device_ptr(device_ptr const& other) : super_t(other) {} /*! \brief Set this \c device_ptr to point to the same object as another @@ -138,7 +138,7 @@ class device_ptr * \return \c *this. */ template - __host__ __device__ + THRUST_HOST_DEVICE device_ptr &operator=(device_ptr const& other) { super_t::operator=(other); @@ -153,7 +153,7 @@ class device_ptr * * \return \c *this. */ - __host__ __device__ + THRUST_HOST_DEVICE device_ptr& operator=(std::nullptr_t) { super_t::operator=(nullptr); @@ -163,7 +163,7 @@ class device_ptr #if THRUST_DOXYGEN /*! \brief Return the raw pointer that this \c device_ptr points to. */ - __host__ __device__ + THRUST_HOST_DEVICE T* get() const; #endif }; @@ -177,7 +177,7 @@ class device_ptr * \return \c os. */ template -__host__ std::basic_ostream& +THRUST_HOST std::basic_ostream& operator<<(std::basic_ostream& os, device_ptr const& dp); #endif @@ -191,7 +191,7 @@ operator<<(std::basic_ostream& os, device_ptr const& dp); * \return A \c device_ptr pointing to \c ptr. */ template -__host__ __device__ +THRUST_HOST_DEVICE device_ptr device_pointer_cast(T* ptr); /*! \brief Create a \c device_ptr from another \c device_ptr. @@ -200,7 +200,7 @@ device_ptr device_pointer_cast(T* ptr); * \param dptr A \c device_ptr to a \c T. */ template -__host__ __device__ +THRUST_HOST_DEVICE device_ptr device_pointer_cast(device_ptr const& dptr); /*! \} // memory_management diff --git a/thrust/device_reference.h b/thrust/device_reference.h index 512ab4c60..4b25409dd 100644 --- a/thrust/device_reference.h +++ b/thrust/device_reference.h @@ -235,7 +235,7 @@ template * device_reference from device_reference. */ template - __host__ __device__ + THRUST_HOST_DEVICE device_reference(const device_reference &other, typename thrust::detail::enable_if_convertible< typename device_reference::pointer, @@ -273,7 +273,7 @@ template * assert(ref == 13); * \endcode */ - __host__ __device__ + THRUST_HOST_DEVICE explicit device_reference(const pointer &ptr) : super_t(ptr) {} @@ -286,7 +286,7 @@ template * \return *this */ template - __host__ __device__ + THRUST_HOST_DEVICE device_reference &operator=(const device_reference &other) { return super_t::operator=(other); @@ -298,7 +298,7 @@ template * \param x The value to assign from. * \return *this */ - __host__ __device__ + THRUST_HOST_DEVICE device_reference &operator=(const value_type &x) { return super_t::operator=(x); @@ -314,7 +314,7 @@ template * \return A \p device_ptr pointing to the object this * \p device_reference references. */ - __host__ __device__ + THRUST_HOST_DEVICE pointer operator&(void) const; /*! Conversion operator converts this \p device_reference to T @@ -323,13 +323,13 @@ template * * \return A copy of the object referenced by this \p device_reference. */ - __host__ __device__ + THRUST_HOST_DEVICE operator value_type (void) const; /*! swaps the value this \p device_reference references with another. * \p other The other \p device_reference with which to swap. */ - __host__ __device__ + THRUST_HOST_DEVICE void swap(device_reference &other); /*! Prefix increment operator increments the object referenced by this @@ -961,7 +961,7 @@ template * \p y The second \p device_reference of interest. */ template -__host__ __device__ +THRUST_HOST_DEVICE void swap(device_reference& x, device_reference& y) { x.swap(y); diff --git a/thrust/distance.h b/thrust/distance.h index 890879115..a8e9abbe0 100644 --- a/thrust/distance.h +++ b/thrust/distance.h @@ -62,7 +62,7 @@ THRUST_NAMESPACE_BEGIN * \see https://en.cppreference.com/w/cpp/iterator/distance */ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type distance(InputIterator first, InputIterator last); diff --git a/thrust/equal.h b/thrust/equal.h index 2f3518907..f1415f64f 100644 --- a/thrust/equal.h +++ b/thrust/equal.h @@ -75,7 +75,7 @@ THRUST_NAMESPACE_BEGIN * \see https://en.cppreference.com/w/cpp/algorithm/equal */ template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2); @@ -153,7 +153,7 @@ bool equal(InputIterator1 first1, InputIterator1 last1, * * struct compare_modulo_two * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x, int y) const * { * return (x % 2) == (y % 2); @@ -171,7 +171,7 @@ bool equal(InputIterator1 first1, InputIterator1 last1, * \see https://en.cppreference.com/w/cpp/algorithm/equal */ template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred); @@ -203,7 +203,7 @@ bool equal(const thrust::detail::execution_policy_base &exec, Inp * * struct compare_modulo_two * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x, int y) const * { * return (x % 2) == (y % 2); diff --git a/thrust/execution_policy.h b/thrust/execution_policy.h index 7e06e20e5..d363fdf70 100644 --- a/thrust/execution_policy.h +++ b/thrust/execution_policy.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -276,7 +276,7 @@ template * * struct printf_functor * { - * __host__ __device__ + * THRUST_HOST_DEVICE * void operator()(int x) * { * printf("%d\n", x); @@ -324,7 +324,7 @@ static const detail::host_t host; * * struct printf_functor * { - * __host__ __device__ + * THRUST_HOST_DEVICE * void operator()(int x) * { * printf("%d\n", x); @@ -364,7 +364,7 @@ THRUST_INLINE_CONSTANT detail::device_t device; * * struct printf_functor * { - * __host__ __device__ + * THRUST_HOST_DEVICE * void operator()(int x) * { * printf("%d\n", x); diff --git a/thrust/extrema.h b/thrust/extrema.h index ca419a0aa..8e0fbf84f 100644 --- a/thrust/extrema.h +++ b/thrust/extrema.h @@ -49,7 +49,7 @@ THRUST_NAMESPACE_BEGIN * * struct compare_key_value * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(key_value lhs, key_value rhs) * { * return lhs.key < rhs.key; @@ -69,7 +69,7 @@ THRUST_NAMESPACE_BEGIN * \see max */ template -__host__ __device__ +THRUST_HOST_DEVICE T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp); @@ -98,7 +98,7 @@ __host__ __device__ * \see max */ template -__host__ __device__ +THRUST_HOST_DEVICE T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs); @@ -125,7 +125,7 @@ __host__ __device__ * * struct compare_key_value * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(key_value lhs, key_value rhs) * { * return lhs.key < rhs.key; @@ -145,7 +145,7 @@ __host__ __device__ * \see min */ template -__host__ __device__ +THRUST_HOST_DEVICE T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp); @@ -174,7 +174,7 @@ __host__ __device__ * \see min */ template -__host__ __device__ +THRUST_HOST_DEVICE T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs); @@ -223,7 +223,7 @@ __host__ __device__ * \see https://en.cppreference.com/w/cpp/algorithm/min_element */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); @@ -307,7 +307,7 @@ ForwardIterator min_element(ForwardIterator first, ForwardIterator last); * * struct compare_key_value * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(key_value lhs, key_value rhs) * { * return lhs.key < rhs.key; @@ -326,7 +326,7 @@ ForwardIterator min_element(ForwardIterator first, ForwardIterator last); * \see https://en.cppreference.com/w/cpp/algorithm/min_element */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); @@ -367,7 +367,7 @@ ForwardIterator min_element(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); @@ -511,7 +511,7 @@ ForwardIterator max_element(ForwardIterator first, ForwardIterator last); * * struct compare_key_value * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(key_value lhs, key_value rhs) * { * return lhs.key < rhs.key; @@ -530,7 +530,7 @@ ForwardIterator max_element(ForwardIterator first, ForwardIterator last); * \see https://en.cppreference.com/w/cpp/algorithm/max_element */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); @@ -571,7 +571,7 @@ ForwardIterator max_element(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); @@ -706,7 +706,7 @@ thrust::pair minmax_element(ForwardIterator fir * * struct compare_key_value * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(key_value lhs, key_value rhs) * { * return lhs.key < rhs.key; @@ -729,7 +729,7 @@ thrust::pair minmax_element(ForwardIterator fir * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf */ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); @@ -764,7 +764,7 @@ thrust::pair minmax_element(const thrust::detai * * struct compare_key_value * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(key_value lhs, key_value rhs) * { * return lhs.key < rhs.key; diff --git a/thrust/fill.h b/thrust/fill.h index 4f7f8e34a..fea961d4d 100644 --- a/thrust/fill.h +++ b/thrust/fill.h @@ -71,7 +71,7 @@ THRUST_NAMESPACE_BEGIN * \see \c uninitialized_fill */ template -__host__ __device__ +THRUST_HOST_DEVICE void fill(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -110,7 +110,7 @@ __host__ __device__ * \see \c uninitialized_fill */ template -__host__ __device__ +THRUST_HOST_DEVICE void fill(ForwardIterator first, ForwardIterator last, const T &value); @@ -153,7 +153,7 @@ __host__ __device__ * \see \c uninitialized_fill_n */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator fill_n(const thrust::detail::execution_policy_base &exec, OutputIterator first, Size n, @@ -192,7 +192,7 @@ __host__ __device__ * \see \c uninitialized_fill_n */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator fill_n(OutputIterator first, Size n, const T &value); diff --git a/thrust/find.h b/thrust/find.h index 5ab9b0a2d..185d3aac7 100644 --- a/thrust/find.h +++ b/thrust/find.h @@ -75,7 +75,7 @@ THRUST_NAMESPACE_BEGIN * \see mismatch */ template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -146,7 +146,7 @@ InputIterator find(InputIterator first, * * struct greater_than_four * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x > 4; @@ -155,7 +155,7 @@ InputIterator find(InputIterator first, * * struct greater_than_ten * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x > 10; @@ -182,7 +182,7 @@ InputIterator find(InputIterator first, * \see mismatch */ template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -207,7 +207,7 @@ InputIterator find_if(const thrust::detail::execution_policy_base * * struct greater_than_four * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x > 4; @@ -216,7 +216,7 @@ InputIterator find_if(const thrust::detail::execution_policy_base * * struct greater_than_ten * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x > 10; @@ -272,7 +272,7 @@ InputIterator find_if(InputIterator first, * * struct greater_than_four * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x > 4; @@ -281,7 +281,7 @@ InputIterator find_if(InputIterator first, * * struct greater_than_ten * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x > 10; @@ -308,7 +308,7 @@ InputIterator find_if(InputIterator first, * \see mismatch */ template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if_not(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -333,7 +333,7 @@ InputIterator find_if_not(const thrust::detail::execution_policy_base 4; @@ -342,7 +342,7 @@ InputIterator find_if_not(const thrust::detail::execution_policy_base 10; diff --git a/thrust/for_each.h b/thrust/for_each.h index 7d05e3ea1..182beac63 100644 --- a/thrust/for_each.h +++ b/thrust/for_each.h @@ -65,10 +65,10 @@ THRUST_NAMESPACE_BEGIN * * struct printf_functor * { - * __host__ __device__ + * THRUST_HOST_DEVICE * void operator()(int x) * { - * // note that using printf in a __device__ function requires + * // note that using printf in a THRUST_DEVICE function requires * // code compiled for a GPU with compute capability 2.0 or * // higher (nvcc --arch=sm_20) * printf("%d\n", x); @@ -89,7 +89,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -128,10 +128,10 @@ InputIterator for_each(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each_n(const thrust::detail::execution_policy_base &exec, InputIterator first, Size n, @@ -186,10 +186,10 @@ InputIterator for_each_n(const thrust::detail::execution_policy_base struct binary_traits; * \code * struct sine : public thrust::unary_function * { - * __host__ __device__ + * THRUST_HOST_DEVICE * float operator()(float x) { return sinf(x); } * }; * \endcode @@ -92,7 +92,7 @@ struct unary_function * \code * struct exponentiate : public thrust::binary_function * { - * __host__ __device__ + * THRUST_HOST_DEVICE * float operator()(float x, float y) { return powf(x,y); } * }; * \endcode @@ -145,9 +145,9 @@ struct binary_function /*! This functor is transparent. */ \ using is_transparent = void; \ /*! Function call operator. */ \ - __thrust_exec_check_disable__ \ + THRUST_EXEC_CHECK_DISABLE \ template \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ constexpr auto operator()(T&& x) const \ noexcept(noexcept(impl)) THRUST_TRAILING_RETURN(decltype(impl)) \ { \ @@ -162,9 +162,9 @@ struct binary_function /*! This functor is transparent. */ \ using is_transparent = void; \ /*! Function call operator. */ \ - __thrust_exec_check_disable__ \ + THRUST_EXEC_CHECK_DISABLE \ template \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ constexpr auto operator()(T1&& t1, T2&& t2) const \ noexcept(noexcept(impl)) THRUST_TRAILING_RETURN(decltype(impl)) \ { \ @@ -230,8 +230,8 @@ struct plus /*! Function call operator. The return value is lhs + rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs + rhs; @@ -295,8 +295,8 @@ struct minus /*! Function call operator. The return value is lhs - rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs - rhs; @@ -360,8 +360,8 @@ struct multiplies /*! Function call operator. The return value is lhs * rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs * rhs; @@ -425,8 +425,8 @@ struct divides /*! Function call operator. The return value is lhs / rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs / rhs; @@ -490,8 +490,8 @@ struct modulus /*! Function call operator. The return value is lhs % rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs % rhs; @@ -547,8 +547,8 @@ struct negate /*! Function call operator. The return value is -x. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &x) const { return -x; @@ -603,8 +603,8 @@ struct square /*! Function call operator. The return value is x*x. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &x) const { return x*x; @@ -654,8 +654,8 @@ struct equal_to /*! Function call operator. The return value is lhs == rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs == rhs; @@ -697,8 +697,8 @@ struct not_equal_to /*! Function call operator. The return value is lhs != rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs != rhs; @@ -740,8 +740,8 @@ struct greater /*! Function call operator. The return value is lhs > rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs > rhs; @@ -783,8 +783,8 @@ struct less /*! Function call operator. The return value is lhs < rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs < rhs; @@ -826,8 +826,8 @@ struct greater_equal /*! Function call operator. The return value is lhs >= rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs >= rhs; @@ -869,8 +869,8 @@ struct less_equal /*! Function call operator. The return value is lhs <= rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs <= rhs; @@ -921,8 +921,8 @@ struct logical_and /*! Function call operator. The return value is lhs && rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs && rhs; @@ -964,8 +964,8 @@ struct logical_or /*! Function call operator. The return value is lhs || rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &lhs, const T &rhs) const { return lhs || rhs; @@ -1021,8 +1021,8 @@ struct logical_not /*! Function call operator. The return value is !x. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool operator()(const T &x) const { return !x; @@ -1093,8 +1093,8 @@ struct bit_and /*! Function call operator. The return value is lhs & rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs & rhs; @@ -1157,8 +1157,8 @@ struct bit_or /*! Function call operator. The return value is lhs | rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs | rhs; @@ -1221,8 +1221,8 @@ struct bit_xor /*! Function call operator. The return value is lhs ^ rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs ^ rhs; @@ -1276,8 +1276,8 @@ struct identity /*! Function call operator. The return value is x. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr const T &operator()(const T &x) const { return x; @@ -1332,8 +1332,8 @@ struct maximum /*! Function call operator. The return value is rhs < lhs ? lhs : rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs < rhs ? rhs : lhs; @@ -1390,8 +1390,8 @@ struct minimum /*! Function call operator. The return value is lhs < rhs ? lhs : rhs. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr T operator()(const T &lhs, const T &rhs) const { return lhs < rhs ? lhs : rhs; @@ -1442,7 +1442,7 @@ struct project1st /*! Function call operator. The return value is lhs. */ - __host__ __device__ + THRUST_HOST_DEVICE constexpr const T1 &operator()(const T1 &lhs, const T2 & /*rhs*/) const { return lhs; @@ -1458,9 +1458,9 @@ struct project1st /// converted to the required type, and uses perfect forwarding. using is_transparent = void; /// \brief Invocation operator - returns its first argument. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&&) const noexcept(noexcept(THRUST_FWD(t1))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t1))) @@ -1507,7 +1507,7 @@ struct project2nd /*! Function call operator. The return value is rhs. */ - __host__ __device__ + THRUST_HOST_DEVICE constexpr const T2 &operator()(const T1 &/*lhs*/, const T2 &rhs) const { return rhs; @@ -1523,9 +1523,9 @@ struct project2nd /// converted to the required type, and uses perfect forwarding. using is_transparent = void; /// \brief Invocation operator - returns its second argument. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto operator()(T1&&, T2&& t2) const noexcept(noexcept(THRUST_FWD(t2))) THRUST_TRAILING_RETURN(decltype(THRUST_FWD(t2))) @@ -1561,13 +1561,13 @@ struct unary_negate /*! Constructor takes a \p Predicate object to negate. * \param p The \p Predicate object to negate. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit unary_negate(Predicate p) : pred(p){} /*! Function call operator. The return value is !pred(x). */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE bool operator()(const typename Predicate::argument_type& x) { return !pred(x); } /*! \cond @@ -1594,7 +1594,7 @@ struct unary_negate * \see not2 */ template - __host__ __device__ + THRUST_HOST_DEVICE unary_negate not1(const Predicate &pred); /*! \p binary_negate is a function object adaptor: it is an Adaptable Binary @@ -1616,13 +1616,13 @@ struct binary_negate /*! Constructor takes a \p Predicate object to negate. * \param p The \p Predicate object to negate. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit binary_negate(Predicate p) : pred(p){} /*! Function call operator. The return value is !pred(x,y). */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE bool operator()(const typename Predicate::first_argument_type& x, const typename Predicate::second_argument_type& y) { return !pred(x,y); @@ -1652,7 +1652,7 @@ struct binary_negate * \see not1 */ template - __host__ __device__ + THRUST_HOST_DEVICE binary_negate not2(const BinaryPredicate &pred); /*! \} diff --git a/thrust/future.h b/thrust/future.h index 7542b2fb6..8408f9fe3 100644 --- a/thrust/future.h +++ b/thrust/future.h @@ -79,14 +79,14 @@ namespace unimplemented struct no_unique_eager_event_type_found {}; -inline __host__ +inline THRUST_HOST no_unique_eager_event_type_found unique_eager_event_type(...) noexcept; struct no_unique_eager_future_type_found {}; template -__host__ +THRUST_HOST no_unique_eager_future_type_found unique_eager_future_type(...) noexcept; diff --git a/thrust/gather.h b/thrust/gather.h index 41acc22a3..2f3ae683d 100644 --- a/thrust/gather.h +++ b/thrust/gather.h @@ -84,7 +84,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather(const thrust::detail::execution_policy_base &exec, InputIterator map_first, InputIterator map_last, @@ -205,7 +205,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, InputIterator1 map_first, InputIterator1 map_last, @@ -315,7 +315,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, InputIterator1 map_first, InputIterator1 map_last, @@ -395,7 +395,7 @@ __host__ __device__ * * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int x) * { * return (x % 2) == 0; diff --git a/thrust/generate.h b/thrust/generate.h index d47295344..d554dad2f 100644 --- a/thrust/generate.h +++ b/thrust/generate.h @@ -70,7 +70,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE void generate(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -156,7 +156,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator generate_n(const thrust::detail::execution_policy_base &exec, OutputIterator first, Size n, diff --git a/thrust/host_vector.h b/thrust/host_vector.h index bb925ea9c..496b37844 100644 --- a/thrust/host_vector.h +++ b/thrust/host_vector.h @@ -66,14 +66,14 @@ template > /*! This constructor creates an empty \p host_vector. */ - __host__ + THRUST_HOST host_vector(void) :Parent() {} /*! This constructor creates an empty \p host_vector. * \param alloc The allocator to use by this host_vector. */ - __host__ + THRUST_HOST host_vector(const Alloc &alloc) :Parent(alloc) {} @@ -81,14 +81,14 @@ template > */ // Define an empty destructor to explicitly specify // its execution space qualifier, as a workaround for nvcc warning - __host__ + THRUST_HOST ~host_vector(void) {} /*! This constructor creates a \p host_vector with the given * size. * \param n The number of elements to initially create. */ - __host__ + THRUST_HOST explicit host_vector(size_type n) :Parent(n) {} @@ -97,7 +97,7 @@ template > * \param n The number of elements to initially create. * \param alloc The allocator to use by this host_vector. */ - __host__ + THRUST_HOST explicit host_vector(size_type n, const Alloc &alloc) :Parent(n,alloc) {} @@ -106,7 +106,7 @@ template > * \param n The number of elements to initially create. * \param value An element to copy. */ - __host__ + THRUST_HOST explicit host_vector(size_type n, const value_type &value) :Parent(n,value) {} @@ -116,14 +116,14 @@ template > * \param value An element to copy. * \param alloc The allocator to use by this host_vector. */ - __host__ + THRUST_HOST explicit host_vector(size_type n, const value_type &value, const Alloc &alloc) :Parent(n,value,alloc) {} /*! Copy constructor copies from an exemplar \p host_vector. * \param v The \p host_vector to copy. */ - __host__ + THRUST_HOST host_vector(const host_vector &v) :Parent(v) {} @@ -131,7 +131,7 @@ template > * \param v The \p host_vector to copy. * \param alloc The allocator to use by this host_vector. */ - __host__ + THRUST_HOST host_vector(const host_vector &v, const Alloc &alloc) :Parent(v,alloc) {} @@ -139,7 +139,7 @@ template > /*! Move constructor moves from another host_vector. * \param v The host_vector to move. */ - __host__ + THRUST_HOST host_vector(host_vector &&v) :Parent(std::move(v)) {} @@ -147,7 +147,7 @@ template > * \param v The host_vector to move. * \param alloc The allocator to use by this host_vector. */ - __host__ + THRUST_HOST host_vector(host_vector &&v, const Alloc &alloc) :Parent(std::move(v),alloc) {} #endif @@ -155,7 +155,7 @@ template > /*! Assign operator copies from an exemplar \p host_vector. * \param v The \p host_vector to copy. */ - __host__ + THRUST_HOST host_vector &operator=(const host_vector &v) { Parent::operator=(v); return *this; } @@ -163,7 +163,7 @@ template > /*! Move assign operator moves from another host_vector. * \param v The host_vector to move. */ - __host__ + THRUST_HOST host_vector &operator=(host_vector &&v) { Parent::operator=(std::move(v)); return *this; } #endif @@ -172,7 +172,7 @@ template > * \param v The \p host_vector to copy. */ template - __host__ + THRUST_HOST host_vector(const host_vector &v) :Parent(v) {} @@ -180,7 +180,7 @@ template > * \param v The \p host_vector to copy. */ template - __host__ + THRUST_HOST host_vector &operator=(const host_vector &v) { Parent::operator=(v); return *this; } @@ -188,7 +188,7 @@ template > * \param v The std::vector to copy. */ template - __host__ + THRUST_HOST host_vector(const std::vector &v) :Parent(v) {} @@ -196,7 +196,7 @@ template > * \param v The std::vector to copy. */ template - __host__ + THRUST_HOST host_vector &operator=(const std::vector &v) { Parent::operator=(v); return *this;} @@ -206,7 +206,7 @@ template > * \param v The \p vector_base to copy. */ template - __host__ + THRUST_HOST host_vector(const detail::vector_base &v) :Parent(v) {} @@ -215,7 +215,7 @@ template > * \param v The \p vector_base to copy. */ template - __host__ + THRUST_HOST host_vector &operator=(const detail::vector_base &v) { Parent::operator=(v); return *this; } @@ -243,7 +243,7 @@ template > * \param last The end of the range. */ template - __host__ + THRUST_HOST host_vector(InputIterator first, InputIterator last) :Parent(first, last) {} @@ -253,7 +253,7 @@ template > * \param alloc The allocator to use by this host_vector. */ template - __host__ + THRUST_HOST host_vector(InputIterator first, InputIterator last, const Alloc &alloc) :Parent(first, last, alloc) {} diff --git a/thrust/inner_product.h b/thrust/inner_product.h index 80068cf0c..9050613b0 100644 --- a/thrust/inner_product.h +++ b/thrust/inner_product.h @@ -79,7 +79,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -187,7 +187,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/iterator/constant_iterator.h b/thrust/iterator/constant_iterator.h index 08f7e699b..747d75972 100644 --- a/thrust/iterator/constant_iterator.h +++ b/thrust/iterator/constant_iterator.h @@ -115,7 +115,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE constant_iterator(constant_iterator const &rhs, typename thrust::detail::enable_if_convertible< typename thrust::iterator_system >::type, @@ -151,7 +151,7 @@ templateIncrementable == int, \c 0. */ - __host__ __device__ + THRUST_HOST_DEVICE constant_iterator(value_type const& v, incrementable const &i = incrementable()) : super_t(base_iterator(i)), m_value(v) {} @@ -164,14 +164,14 @@ templateIncrementable == int, \c 0. */ template - __host__ __device__ + THRUST_HOST_DEVICE constant_iterator(OtherValue const& v, OtherIncrementable const& i = incrementable()) : super_t(base_iterator(i)), m_value(v) {} /*! This method returns the value of this \p constant_iterator's constant value. * \return A \c const reference to this \p constant_iterator's constant value. */ - __host__ __device__ + THRUST_HOST_DEVICE Value const& value() const { return m_value; } @@ -179,16 +179,16 @@ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE constant_iterator make_constant_iterator(ValueT x, IndexT i = int()) { return constant_iterator(x, i); @@ -234,7 +234,7 @@ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE constant_iterator make_constant_iterator(V x) { return constant_iterator(x, 0); diff --git a/thrust/iterator/counting_iterator.h b/thrust/iterator/counting_iterator.h index f66cb97ef..01f6a98c9 100644 --- a/thrust/iterator/counting_iterator.h +++ b/thrust/iterator/counting_iterator.h @@ -147,7 +147,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE counting_iterator(counting_iterator const &rhs, typename thrust::detail::enable_if_convertible< typename thrust::iterator_system >::type, @@ -178,7 +178,7 @@ template= 2011 @@ -188,7 +188,7 @@ templatebase_reference(); @@ -196,7 +196,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE bool equal(counting_iterator const& y) const { typedef thrust::detail::counting_iterator_equal e; @@ -204,7 +204,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE difference_type distance_to(counting_iterator const& y) const { @@ -230,7 +230,7 @@ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE counting_iterator make_counting_iterator(Incrementable x) { return counting_iterator(x); diff --git a/thrust/iterator/detail/any_assign.h b/thrust/iterator/detail/any_assign.h index 87192215c..d22280d0e 100644 --- a/thrust/iterator/detail/any_assign.h +++ b/thrust/iterator/detail/any_assign.h @@ -26,15 +26,15 @@ namespace detail // a type which may be assigned any other type struct any_assign { - inline __host__ __device__ any_assign() + inline THRUST_HOST_DEVICE any_assign() {} template - inline __host__ __device__ any_assign(T) + inline THRUST_HOST_DEVICE any_assign(T) {} template - inline __host__ __device__ + inline THRUST_HOST_DEVICE any_assign &operator=(T) { if(0) diff --git a/thrust/iterator/detail/counting_iterator.inl b/thrust/iterator/detail/counting_iterator.inl index ee4a9df15..6ec760864 100644 --- a/thrust/iterator/detail/counting_iterator.inl +++ b/thrust/iterator/detail/counting_iterator.inl @@ -86,7 +86,7 @@ template struct iterator_distance { - __host__ __device__ + THRUST_HOST_DEVICE static Difference distance(Incrementable1 x, Incrementable2 y) { return y - x; @@ -97,7 +97,7 @@ template template struct number_distance { - __host__ __device__ + THRUST_HOST_DEVICE static Difference distance(Incrementable1 x, Incrementable2 y) { return static_cast(numeric_distance(x,y)); @@ -108,7 +108,7 @@ template template struct counting_iterator_equal { - __host__ __device__ + THRUST_HOST_DEVICE static bool equal(Incrementable1 x, Incrementable2 y) { return x == y; @@ -128,7 +128,7 @@ template >::type > { - __host__ __device__ + THRUST_HOST_DEVICE static bool equal(Incrementable1 x, Incrementable2 y) { typedef number_distance d; diff --git a/thrust/iterator/detail/join_iterator.h b/thrust/iterator/detail/join_iterator.h index 83f143dc0..bc743d65f 100644 --- a/thrust/iterator/detail/join_iterator.h +++ b/thrust/iterator/detail/join_iterator.h @@ -78,7 +78,7 @@ class join_iterator typedef typename super_t::difference_type size_type; public: - inline __host__ __device__ + inline THRUST_HOST_DEVICE join_iterator(RandomAccessIterator1 first1, size_type n, RandomAccessIterator2 first2) : super_t(thrust::counting_iterator(0)), m_n1(n), @@ -87,7 +87,7 @@ class join_iterator {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE join_iterator(const join_iterator &other) : super_t(other), m_n1(other.m_n1), @@ -104,7 +104,7 @@ class join_iterator // See goo.gl/LELTNp THRUST_DISABLE_MSVC_WARNING_BEGIN(4172) - __host__ __device__ + THRUST_HOST_DEVICE typename super_t::reference dereference() const { size_type i = *super_t::base(); @@ -121,7 +121,7 @@ class join_iterator template -__host__ __device__ +THRUST_HOST_DEVICE join_iterator make_join_iterator(RandomAccessIterator1 first1, Size n1, RandomAccessIterator2 first2) { return join_iterator(first1, n1, first2); diff --git a/thrust/iterator/detail/normal_iterator.h b/thrust/iterator/detail/normal_iterator.h index eb5d33604..8b8f7b180 100644 --- a/thrust/iterator/detail/normal_iterator.h +++ b/thrust/iterator/detail/normal_iterator.h @@ -43,15 +43,15 @@ template typedef iterator_adaptor, Pointer> super_t; public: - __host__ __device__ + THRUST_HOST_DEVICE normal_iterator() {} - __host__ __device__ + THRUST_HOST_DEVICE normal_iterator(Pointer p) : super_t(p) {} template - __host__ __device__ + THRUST_HOST_DEVICE normal_iterator(const normal_iterator &other, typename thrust::detail::enable_if_convertible< OtherPointer, @@ -63,7 +63,7 @@ template template - inline __host__ __device__ normal_iterator make_normal_iterator(Pointer ptr) + inline THRUST_HOST_DEVICE normal_iterator make_normal_iterator(Pointer ptr) { return normal_iterator(ptr); } diff --git a/thrust/iterator/detail/retag.h b/thrust/iterator/detail/retag.h index d277d8b6f..587fae697 100644 --- a/thrust/iterator/detail/retag.h +++ b/thrust/iterator/detail/retag.h @@ -50,7 +50,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE thrust::detail::tagged_iterator reinterpret_tag(Iterator iter) { @@ -60,7 +60,7 @@ __host__ __device__ // specialization for raw pointer template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pointer reinterpret_tag(T *ptr) { @@ -70,7 +70,7 @@ __host__ __device__ // specialization for thrust::pointer template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pointer reinterpret_tag(thrust::pointer ptr) { @@ -80,7 +80,7 @@ __host__ __device__ // avoid deeply-nested tagged_iterator template -__host__ __device__ +THRUST_HOST_DEVICE thrust::detail::tagged_iterator reinterpret_tag(thrust::detail::tagged_iterator iter) { @@ -89,7 +89,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if_retaggable< typename thrust::iterator_system::type, Tag, @@ -103,7 +103,7 @@ __host__ __device__ // specialization for raw pointer template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if_retaggable< typename thrust::iterator_system::type, Tag, @@ -117,7 +117,7 @@ __host__ __device__ // specialization for thrust::pointer template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if_retaggable< OtherTag, Tag, @@ -131,7 +131,7 @@ __host__ __device__ // avoid deeply-nested tagged_iterator template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if_retaggable< OtherTag, Tag, diff --git a/thrust/iterator/detail/reverse_iterator.inl b/thrust/iterator/detail/reverse_iterator.inl index 2f2320e38..e27235439 100644 --- a/thrust/iterator/detail/reverse_iterator.inl +++ b/thrust/iterator/detail/reverse_iterator.inl @@ -26,9 +26,9 @@ THRUST_NAMESPACE_BEGIN namespace detail { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE Iterator prior(Iterator x) { return --x; @@ -37,7 +37,7 @@ template } // end detail template - __host__ __device__ + THRUST_HOST_DEVICE reverse_iterator ::reverse_iterator(BidirectionalIterator x) :super_t(x) @@ -46,7 +46,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE reverse_iterator ::reverse_iterator(reverse_iterator const &r // XXX msvc screws this up @@ -64,7 +64,7 @@ template } // end reverse_iterator::reverse_iterator() template - __host__ __device__ + THRUST_HOST_DEVICE typename reverse_iterator::super_t::reference reverse_iterator ::dereference() const @@ -73,7 +73,7 @@ template } // end reverse_iterator::increment() template - __host__ __device__ + THRUST_HOST_DEVICE void reverse_iterator ::increment() { @@ -81,7 +81,7 @@ template } // end reverse_iterator::increment() template - __host__ __device__ + THRUST_HOST_DEVICE void reverse_iterator ::decrement() { @@ -89,7 +89,7 @@ template } // end reverse_iterator::decrement() template - __host__ __device__ + THRUST_HOST_DEVICE void reverse_iterator ::advance(typename super_t::difference_type n) { @@ -98,7 +98,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename reverse_iterator::super_t::difference_type reverse_iterator ::distance_to(reverse_iterator const &y) const @@ -107,7 +107,7 @@ template } // end reverse_iterator::distance_to() template -__host__ __device__ +THRUST_HOST_DEVICE reverse_iterator make_reverse_iterator(BidirectionalIterator x) { return reverse_iterator(x); diff --git a/thrust/iterator/detail/tagged_iterator.h b/thrust/iterator/detail/tagged_iterator.h index 24cbbb736..f2cc8aa59 100644 --- a/thrust/iterator/detail/tagged_iterator.h +++ b/thrust/iterator/detail/tagged_iterator.h @@ -50,10 +50,10 @@ template typedef typename tagged_iterator_base::type super_t; public: - __host__ __device__ + THRUST_HOST_DEVICE tagged_iterator() {} - __host__ __device__ + THRUST_HOST_DEVICE explicit tagged_iterator(Iterator x) : super_t(x) {} }; // end tagged_iterator diff --git a/thrust/iterator/detail/transform_input_output_iterator.inl b/thrust/iterator/detail/transform_input_output_iterator.inl index e7aa0b7bb..a6c906d36 100644 --- a/thrust/iterator/detail/transform_input_output_iterator.inl +++ b/thrust/iterator/detail/transform_input_output_iterator.inl @@ -1,6 +1,6 @@ /* * Copyright 2020-2021 NVIDIA Corporation - * Modifications Copyright© 2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2023-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ template using Value = invoke_result_t; public: - __host__ __device__ + THRUST_HOST_DEVICE transform_input_output_iterator_proxy(const Iterator& io, InputFunction input_function, OutputFunction output_function) : io(io), input_function(input_function), output_function(output_function) { @@ -47,24 +47,24 @@ template transform_input_output_iterator_proxy(const transform_input_output_iterator_proxy&) = default; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE operator Value const() const { return input_function(*io); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE transform_input_output_iterator_proxy operator=(const T& x) { *io = output_function(x); return *this; } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE transform_input_output_iterator_proxy operator=(const transform_input_output_iterator_proxy& x) { *io = output_function(x); diff --git a/thrust/iterator/detail/transform_output_iterator.inl b/thrust/iterator/detail/transform_output_iterator.inl index ff0299e0d..0208f0681 100644 --- a/thrust/iterator/detail/transform_output_iterator.inl +++ b/thrust/iterator/detail/transform_output_iterator.inl @@ -35,14 +35,14 @@ template class transform_output_iterator_proxy { public: - __host__ __device__ + THRUST_HOST_DEVICE transform_output_iterator_proxy(const OutputIterator& out, UnaryFunction fun) : out(out), fun(fun) { } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE transform_output_iterator_proxy operator=(const T& x) { *out = fun(x); diff --git a/thrust/iterator/detail/tuple_of_iterator_references.h b/thrust/iterator/detail/tuple_of_iterator_references.h index 2fc9effbb..0b46b111e 100644 --- a/thrust/iterator/detail/tuple_of_iterator_references.h +++ b/thrust/iterator/detail/tuple_of_iterator_references.h @@ -42,27 +42,27 @@ template< using super_t = thrust::tuple; using super_t::super_t; - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references() : super_t() {} // allow implicit construction from tuple - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references(const super_t& other) : super_t(other) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references(super_t&& other) : super_t(::cuda::std::move(other)) {} // allow assignment from tuples // XXX might be worthwhile to guard this with an enable_if is_assignable - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references &operator=(const thrust::tuple &other) { super_t::operator=(other); @@ -71,9 +71,9 @@ template< // allow assignment from pairs // XXX might be worthwhile to guard this with an enable_if is_assignable - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references &operator=(const thrust::pair &other) { super_t::operator=(other); @@ -83,9 +83,9 @@ template< // allow assignment from reference // XXX perhaps we should generalize to reference // we could captures reference this way - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references& operator=(const thrust::reference, Pointer, Derived> &other) { @@ -97,7 +97,7 @@ template< } template = 0> - inline __host__ __device__ + inline THRUST_HOST_DEVICE constexpr operator thrust::tuple() const { return to_tuple(typename ::cuda::std::__make_tuple_indices::type{}); } @@ -105,7 +105,7 @@ template< // this overload of swap() permits swapping tuple_of_iterator_references returned as temporaries from // iterator dereferences template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend void swap(tuple_of_iterator_references&& x, tuple_of_iterator_references&& y) { x.swap(y); @@ -113,7 +113,7 @@ template< private: template - inline __host__ __device__ + inline THRUST_HOST_DEVICE constexpr thrust::tuple to_tuple(::cuda::std::__tuple_indices) const { return {get(*this)...}; } @@ -171,16 +171,16 @@ template< public: // allow implicit construction from tuple - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references(const super_t &other) : super_t(other) {} // allow assignment from tuples // XXX might be worthwhile to guard this with an enable_if is_assignable - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references &operator=(const thrust::tuple &other) { super_t::operator=(other); @@ -189,9 +189,9 @@ template< // allow assignment from pairs // XXX might be worthwhile to guard this with an enable_if is_assignable - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references &operator=(const thrust::pair &other) { super_t::operator=(other); @@ -201,10 +201,10 @@ template< // allow assignment from reference // XXX perhaps we should generalize to reference // we could captures reference this way - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references & operator=(const thrust::reference, Pointer, Derived> &other) { @@ -218,10 +218,10 @@ template< // duplicate thrust::tuple's constructors - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references() {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple_of_iterator_references(typename access_traits::parameter_type... ts) : super_t(ts...) {} @@ -234,7 +234,7 @@ template< typename... Ts, typename... Us > -inline __host__ __device__ +inline THRUST_HOST_DEVICE void swap(tuple_of_iterator_references x, tuple_of_iterator_references y) { diff --git a/thrust/iterator/detail/zip_iterator.inl b/thrust/iterator/detail/zip_iterator.inl index a2bc98afe..36a0ebf7c 100644 --- a/thrust/iterator/detail/zip_iterator.inl +++ b/thrust/iterator/detail/zip_iterator.inl @@ -25,7 +25,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE zip_iterator ::zip_iterator() { @@ -33,7 +33,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE zip_iterator ::zip_iterator(IteratorTuple iterator_tuple) :m_iterator_tuple(iterator_tuple) @@ -43,7 +43,7 @@ __host__ __device__ template template - __host__ __device__ + THRUST_HOST_DEVICE zip_iterator ::zip_iterator(const zip_iterator &other, typename thrust::detail::enable_if_convertible< @@ -56,7 +56,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE const IteratorTuple &zip_iterator ::get_iterator_tuple() const { @@ -66,7 +66,7 @@ const IteratorTuple &zip_iterator template typename zip_iterator::super_t::reference - __host__ __device__ + THRUST_HOST_DEVICE zip_iterator ::dereference() const { @@ -78,10 +78,10 @@ template } // end zip_iterator::dereference() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template template - __host__ __device__ + THRUST_HOST_DEVICE bool zip_iterator ::equal(const zip_iterator &other) const { @@ -90,7 +90,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE void zip_iterator ::advance(typename super_t::difference_type n) { @@ -101,7 +101,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void zip_iterator ::increment() { @@ -111,7 +111,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void zip_iterator ::decrement() { @@ -120,10 +120,10 @@ __host__ __device__ } // end zip_iterator::decrement() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template template - __host__ __device__ + THRUST_HOST_DEVICE typename zip_iterator::super_t::difference_type zip_iterator ::distance_to(const zip_iterator &other) const @@ -133,7 +133,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE zip_iterator> make_zip_iterator(thrust::tuple t) { return zip_iterator>(t); @@ -141,7 +141,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE zip_iterator> make_zip_iterator(Iterators... its) { return make_zip_iterator(thrust::make_tuple(its...)); diff --git a/thrust/iterator/detail/zip_iterator_base.h b/thrust/iterator/detail/zip_iterator_base.h index 2e80ae9ec..3a5f88f42 100644 --- a/thrust/iterator/detail/zip_iterator_base.h +++ b/thrust/iterator/detail/zip_iterator_base.h @@ -46,12 +46,12 @@ template class advance_iterator { public: - inline __host__ __device__ + inline THRUST_HOST_DEVICE advance_iterator(DiffType step) : m_step(step) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(Iterator& it) const { thrust::advance(it, m_step); } @@ -62,9 +62,9 @@ class advance_iterator struct increment_iterator { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(Iterator& it) { ++it; } }; // end increment_iterator @@ -72,9 +72,9 @@ struct increment_iterator struct decrement_iterator { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void operator()(Iterator& it) { --it; } }; // end decrement_iterator @@ -90,10 +90,10 @@ struct dereference_iterator type; }; // end apply - // XXX silence warnings of the form "calling a __host__ function from a __host__ __device__ function is not allowed - __thrust_exec_check_disable__ + // XXX silence warnings of the form "calling a __host__ function from a THRUST_HOST_DEVICE function is not allowed + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE typename apply::type operator()(Iterator const& it) { return *it; @@ -160,14 +160,14 @@ template< template -inline __host__ __device__ +inline THRUST_HOST_DEVICE Fun tuple_for_each_helper(Fun f) { return f; } template -inline __host__ __device__ +inline THRUST_HOST_DEVICE Fun tuple_for_each_helper(Fun f, T& t, Ts&... ts) { f(t); @@ -177,7 +177,7 @@ Fun tuple_for_each_helper(Fun f, T& t, Ts&... ts) // for_each algorithm for tuples. template -inline __host__ __device__ +inline THRUST_HOST_DEVICE Fun tuple_for_each(thrust::tuple& t, Fun f, thrust::index_sequence) { return tuple_for_each_helper(f, thrust::get(t)...); @@ -185,7 +185,7 @@ Fun tuple_for_each(thrust::tuple& t, Fun f, thrust::index_sequence // for_each algorithm for tuples. template -inline __host__ __device__ +inline THRUST_HOST_DEVICE Fun tuple_for_each(thrust::tuple& t, Fun f) { return tuple_for_each(t, f, thrust::make_index_sequence>::value>{}); diff --git a/thrust/iterator/discard_iterator.h b/thrust/iterator/discard_iterator.h index 3db170209..af4a94960 100644 --- a/thrust/iterator/discard_iterator.h +++ b/thrust/iterator/discard_iterator.h @@ -111,7 +111,7 @@ template * * \p rhs The discard_iterator to copy. */ - __host__ __device__ + THRUST_HOST_DEVICE discard_iterator(discard_iterator const &rhs) : super_t(rhs.base()) {} @@ -126,7 +126,7 @@ template * value returned by \c Incrementable's null constructor. For example, * when Incrementable == int, \c 0. */ - __host__ __device__ + THRUST_HOST_DEVICE discard_iterator(incrementable const &i = incrementable()) : super_t(base_iterator(i)) {} @@ -134,7 +134,7 @@ template */ private: // Core iterator interface - __host__ __device__ + THRUST_HOST_DEVICE reference dereference() const { return m_element; @@ -156,7 +156,7 @@ template * * \see constant_iterator */ -inline __host__ __device__ +inline THRUST_HOST_DEVICE discard_iterator<> make_discard_iterator(discard_iterator<>::difference_type i = discard_iterator<>::difference_type(0)) { return discard_iterator<>(i); diff --git a/thrust/iterator/iterator_adaptor.h b/thrust/iterator/iterator_adaptor.h index 36ba0f63f..dee6f248c 100644 --- a/thrust/iterator/iterator_adaptor.h +++ b/thrust/iterator/iterator_adaptor.h @@ -75,7 +75,7 @@ THRUST_NAMESPACE_BEGIN * Iterator * > super_t; * - * __host__ __device__ + * THRUST_HOST_DEVICE * repeat_iterator(const Iterator &x, int n) : super_t(x), begin(x), n(n) {} * * // befriend thrust::iterator_core_access to allow it access to the private interface below @@ -89,7 +89,7 @@ THRUST_NAMESPACE_BEGIN * const Iterator begin; * * // it is private because only thrust::iterator_core_access needs access to it - * __host__ __device__ + * THRUST_HOST_DEVICE * typename super_t::reference dereference() const * { * return *(begin + (this->base() - begin) / n); @@ -142,8 +142,8 @@ template - __host__ __device__ + THRUST_HOST_DEVICE bool equal(iterator_adaptor const& x) const { return m_iterator == x.base(); } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void advance(typename iterator_adaptor::difference_type n) { // XXX statically assert on random_access_traversal_tag @@ -205,22 +205,22 @@ template(m_iterator + n); } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void increment() { ++m_iterator; } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void decrement() { // XXX statically assert on bidirectional_traversal_tag --m_iterator; } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE typename iterator_adaptor::difference_type distance_to(iterator_adaptor const& y) const { return y.base() - m_iterator; } diff --git a/thrust/iterator/iterator_facade.h b/thrust/iterator/iterator_facade.h index f6920c5c8..40ba4d70d 100644 --- a/thrust/iterator/iterator_facade.h +++ b/thrust/iterator/iterator_facade.h @@ -68,42 +68,42 @@ class iterator_core_access // iterator comparisons are our friends template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend bool operator ==(iterator_facade const& lhs, iterator_facade const& rhs); template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend bool operator !=(iterator_facade const& lhs, iterator_facade const& rhs); template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend bool operator <(iterator_facade const& lhs, iterator_facade const& rhs); template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend bool operator >(iterator_facade const& lhs, iterator_facade const& rhs); template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend bool operator <=(iterator_facade const& lhs, iterator_facade const& rhs); template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend bool operator >=(iterator_facade const& lhs, iterator_facade const& rhs); @@ -111,7 +111,7 @@ class iterator_core_access // iterator difference is our friend template - inline __host__ __device__ + inline THRUST_HOST_DEVICE friend typename thrust::detail::distance_from_result< iterator_facade, @@ -121,28 +121,28 @@ class iterator_core_access iterator_facade const& rhs); template - __host__ __device__ + THRUST_HOST_DEVICE static typename Facade::reference dereference(Facade const& f) { return f.dereference(); } template - __host__ __device__ + THRUST_HOST_DEVICE static void increment(Facade& f) { f.increment(); } template - __host__ __device__ + THRUST_HOST_DEVICE static void decrement(Facade& f) { f.decrement(); } template - __host__ __device__ + THRUST_HOST_DEVICE static bool equal(Facade1 const& f1, Facade2 const& f2) { return f1.equal(f2); @@ -150,21 +150,21 @@ class iterator_core_access // XXX TODO: Investigate whether we need both of these cases //template - //__host__ __device__ + //THRUST_HOST_DEVICE //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::true_) //{ // return f1.equal(f2); //} //template - //__host__ __device__ + //THRUST_HOST_DEVICE //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::false_) //{ // return f2.equal(f1); //} template - __host__ __device__ + THRUST_HOST_DEVICE static void advance(Facade& f, typename Facade::difference_type n) { f.advance(n); @@ -173,7 +173,7 @@ class iterator_core_access // Facade2 is convertible to Facade1, // so return Facade1's difference_type template - __host__ __device__ + THRUST_HOST_DEVICE static typename Facade1::difference_type distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::true_type) { @@ -183,7 +183,7 @@ class iterator_core_access // Facade2 is not convertible to Facade1, // so return Facade2's difference_type template - __host__ __device__ + THRUST_HOST_DEVICE static typename Facade2::difference_type distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::false_type) { @@ -191,7 +191,7 @@ class iterator_core_access } template - __host__ __device__ + THRUST_HOST_DEVICE static typename thrust::detail::distance_from_result::type distance_from(Facade1 const& f1, Facade2 const& f2) { @@ -205,14 +205,14 @@ class iterator_core_access // Curiously Recurring Template interface. // template - __host__ __device__ + THRUST_HOST_DEVICE static Derived& derived(iterator_facade& facade) { return *static_cast(&facade); } template - __host__ __device__ + THRUST_HOST_DEVICE static Derived const& derived(iterator_facade const& facade) { return *static_cast(&facade); @@ -257,13 +257,13 @@ template(this); } - __host__ __device__ + THRUST_HOST_DEVICE Derived const& derived() const { return *static_cast(this); @@ -303,7 +303,7 @@ templatederived()); @@ -321,7 +321,7 @@ templatederived() + n); @@ -330,7 +330,7 @@ template*this */ - __host__ __device__ + THRUST_HOST_DEVICE Derived& operator++() { iterator_core_access::increment(this->derived()); @@ -340,7 +340,7 @@ template*this before increment. */ - __host__ __device__ + THRUST_HOST_DEVICE Derived operator++(int) { Derived tmp(this->derived()); @@ -351,7 +351,7 @@ template*this */ - __host__ __device__ + THRUST_HOST_DEVICE Derived& operator--() { iterator_core_access::decrement(this->derived()); @@ -361,7 +361,7 @@ template*this before decrement. */ - __host__ __device__ + THRUST_HOST_DEVICE Derived operator--(int) { Derived tmp(this->derived()); @@ -373,7 +373,7 @@ template*this */ - __host__ __device__ + THRUST_HOST_DEVICE Derived& operator+=(difference_type n) { iterator_core_access::advance(this->derived(), n); @@ -384,7 +384,7 @@ template*this */ - __host__ __device__ + THRUST_HOST_DEVICE Derived& operator-=(difference_type n) { iterator_core_access::advance(this->derived(), -n); @@ -395,7 +395,7 @@ templatederived()); @@ -409,7 +409,7 @@ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE // XXX it might be nice to implement this at some point //typename enable_if_interoperable::type // exposition bool @@ -423,7 +423,7 @@ operator ==(iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE // XXX it might be nice to implement this at some point //typename enable_if_interoperable::type // exposition bool @@ -437,7 +437,7 @@ operator !=(iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE // XXX it might be nice to implement this at some point //typename enable_if_interoperable::type // exposition bool @@ -451,7 +451,7 @@ operator <(iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE // XXX it might be nice to implement this at some point //typename enable_if_interoperable::type // exposition bool @@ -465,7 +465,7 @@ operator >(iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE // XXX it might be nice to implement this at some point //typename enable_if_interoperable::type // exposition bool @@ -479,7 +479,7 @@ operator <=(iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE // XXX it might be nice to implement this at some point //typename enable_if_interoperable::type // exposition bool @@ -494,7 +494,7 @@ operator >=(iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE // divine the type this operator returns typename thrust::detail::distance_from_result< @@ -512,7 +512,7 @@ operator-(iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE Derived operator+ (iterator_facade const& i, typename Derived::difference_type n) { @@ -521,7 +521,7 @@ Derived operator+ (iterator_facade -inline __host__ __device__ +inline THRUST_HOST_DEVICE Derived operator+ (typename Derived::difference_type n, iterator_facade const& i) { diff --git a/thrust/iterator/permutation_iterator.h b/thrust/iterator/permutation_iterator.h index 60e9c8e8a..e0a1a9526 100644 --- a/thrust/iterator/permutation_iterator.h +++ b/thrust/iterator/permutation_iterator.h @@ -144,7 +144,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE permutation_iterator(permutation_iterator const &r // XXX remove these guards when we have static_assert , typename detail::enable_if_convertible::type* = 0 @@ -169,8 +169,8 @@ template base()); @@ -198,7 +198,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE permutation_iterator make_permutation_iterator(ElementIterator e, IndexIterator i) { return permutation_iterator(e,i); diff --git a/thrust/iterator/retag.h b/thrust/iterator/retag.h index 1eb770ae3..166c78106 100644 --- a/thrust/iterator/retag.h +++ b/thrust/iterator/retag.h @@ -42,7 +42,7 @@ THRUST_NAMESPACE_BEGIN * \see retag */ template -__host__ __device__ +THRUST_HOST_DEVICE unspecified_iterator_type reinterpret_tag(Iterator iter); /*! \p retag returns a copy of an iterator and changes the type of the result's system tag. @@ -57,7 +57,7 @@ unspecified_iterator_type reinterpret_tag(Iterator iter); * \see reinterpret_tag */ template -__host__ __device__ +THRUST_HOST_DEVICE unspecified_iterator_type retag(Iterator iter); #endif diff --git a/thrust/iterator/reverse_iterator.h b/thrust/iterator/reverse_iterator.h index 87b0cc989..a642379c2 100644 --- a/thrust/iterator/reverse_iterator.h +++ b/thrust/iterator/reverse_iterator.h @@ -158,7 +158,7 @@ template public: /*! Default constructor does nothing. */ - __host__ __device__ + THRUST_HOST_DEVICE reverse_iterator() {} /*! \p Constructor accepts a \c BidirectionalIterator pointing to a range @@ -166,7 +166,7 @@ template * * \param x A \c BidirectionalIterator pointing to a range to reverse. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit reverse_iterator(BidirectionalIterator x); /*! \p Copy constructor allows construction from a related compatible @@ -175,7 +175,7 @@ template * \param r A \p reverse_iterator to copy from. */ template - __host__ __device__ + THRUST_HOST_DEVICE reverse_iterator(reverse_iterator const &r // XXX msvc screws this up // XXX remove these guards when we have static_assert @@ -192,21 +192,21 @@ template /*! \cond */ private: - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE typename super_t::reference dereference() const; - __host__ __device__ + THRUST_HOST_DEVICE void increment(); - __host__ __device__ + THRUST_HOST_DEVICE void decrement(); - __host__ __device__ + THRUST_HOST_DEVICE void advance(typename super_t::difference_type n); template - __host__ __device__ + THRUST_HOST_DEVICE typename super_t::difference_type distance_to(reverse_iterator const &y) const; /*! \endcond @@ -221,7 +221,7 @@ template * \return A new \p reverse_iterator which reverses the range \p x. */ template -__host__ __device__ +THRUST_HOST_DEVICE reverse_iterator make_reverse_iterator(BidirectionalIterator x); diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h index d287123e8..49dc85521 100644 --- a/thrust/iterator/transform_input_output_iterator.h +++ b/thrust/iterator/transform_input_output_iterator.h @@ -112,7 +112,7 @@ template * \param input_function An \c InputFunction to be executed on values read from the iterator * \param output_function An \c OutputFunction to be executed on values written to the iterator */ - __host__ __device__ + THRUST_HOST_DEVICE transform_input_output_iterator(Iterator const& io, InputFunction input_function, OutputFunction output_function) : super_t(io), input_function(input_function), output_function(output_function) { @@ -122,7 +122,7 @@ template */ private: - __host__ __device__ + THRUST_HOST_DEVICE typename super_t::reference dereference() const { return detail::transform_input_output_iterator_proxy< @@ -148,7 +148,7 @@ template */ template transform_input_output_iterator -__host__ __device__ +THRUST_HOST_DEVICE make_transform_input_output_iterator(Iterator io, InputFunction input_function, OutputFunction output_function) { return transform_input_output_iterator(io, input_function, output_function); diff --git a/thrust/iterator/transform_iterator.h b/thrust/iterator/transform_iterator.h index 5424c15b2..c049aa1a1 100644 --- a/thrust/iterator/transform_iterator.h +++ b/thrust/iterator/transform_iterator.h @@ -69,7 +69,7 @@ THRUST_NAMESPACE_BEGIN * // note: functor inherits from unary_function * struct square_root : public thrust::unary_function * { - * __host__ __device__ + * THRUST_HOST_DEVICE * float operator()(float x) const * { * return sqrtf(x); @@ -112,7 +112,7 @@ THRUST_NAMESPACE_BEGIN * // note: functor inherits from unary_function * struct square : public thrust::unary_function * { - * __host__ __device__ + * THRUST_HOST_DEVICE * float operator()(float x) const * { * return x * x; @@ -153,7 +153,7 @@ THRUST_NAMESPACE_BEGIN * // note: functor *does not* inherit from unary_function * struct square_root * { - * __host__ __device__ + * THRUST_HOST_DEVICE * float operator()(float x) const * { * return sqrtf(x); @@ -217,7 +217,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE transform_iterator(const transform_iterator &other, typename thrust::detail::enable_if_convertible::type* = 0, typename thrust::detail::enable_if_convertible::type* = 0) @@ -256,7 +256,7 @@ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE transform_iterator make_transform_iterator(Iterator it, AdaptableUnaryFunction fun) { diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h index 514f732f2..62e3f306e 100644 --- a/thrust/iterator/transform_output_iterator.h +++ b/thrust/iterator/transform_output_iterator.h @@ -55,7 +55,7 @@ THRUST_NAMESPACE_BEGIN * // note: functor inherits from unary_function * struct square_root : public thrust::unary_function * { - * __host__ __device__ + * THRUST_HOST_DEVICE * float operator()(float x) const * { * return sqrtf(x); @@ -114,7 +114,7 @@ template * \param fun An \c UnaryFunction used to transform the objects assigned to * this \p transform_output_iterator. */ - __host__ __device__ + THRUST_HOST_DEVICE transform_output_iterator(OutputIterator const& out, UnaryFunction fun) : super_t(out), fun(fun) { } @@ -123,7 +123,7 @@ template */ private: - __host__ __device__ + THRUST_HOST_DEVICE typename super_t::reference dereference() const { return detail::transform_output_iterator_proxy< @@ -148,7 +148,7 @@ template */ template transform_output_iterator -__host__ __device__ +THRUST_HOST_DEVICE make_transform_output_iterator(OutputIterator out, UnaryFunction fun) { return transform_output_iterator(out, fun); diff --git a/thrust/iterator/zip_iterator.h b/thrust/iterator/zip_iterator.h index c2dd5ddc4..a718cd4c4 100644 --- a/thrust/iterator/zip_iterator.h +++ b/thrust/iterator/zip_iterator.h @@ -142,7 +142,7 @@ template public: /*! Null constructor does nothing. */ - inline __host__ __device__ + inline THRUST_HOST_DEVICE zip_iterator(); /*! This constructor creates a new \p zip_iterator from a @@ -150,7 +150,7 @@ template * * \param iterator_tuple The \p tuple of iterators to copy from. */ - inline __host__ __device__ + inline THRUST_HOST_DEVICE zip_iterator(IteratorTuple iterator_tuple); /*! This copy constructor creates a new \p zip_iterator from another @@ -159,7 +159,7 @@ template * \param other The \p zip_iterator to copy. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE zip_iterator(const zip_iterator &other, typename thrust::detail::enable_if_convertible< OtherIteratorTuple, @@ -172,7 +172,7 @@ template * \return A \c const reference to this \p zip_iterator's \p tuple * of iterators. */ - inline __host__ __device__ + inline THRUST_HOST_DEVICE const IteratorTuple &get_iterator_tuple() const; /*! \cond @@ -185,31 +185,31 @@ template // Dereferencing returns a tuple built from the dereferenced // iterators in the iterator tuple. - __host__ __device__ + THRUST_HOST_DEVICE typename super_t::reference dereference() const; // Two zip_iterators are equal if the two first iterators of the // tuple are equal. Note this differs from Boost's implementation, which // considers the entire tuple. template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool equal(const zip_iterator &other) const; // Advancing a zip_iterator means to advance all iterators in the tuple - inline __host__ __device__ + inline THRUST_HOST_DEVICE void advance(typename super_t::difference_type n); // Incrementing a zip iterator means to increment all iterators in the tuple - inline __host__ __device__ + inline THRUST_HOST_DEVICE void increment(); // Decrementing a zip iterator means to decrement all iterators in the tuple - inline __host__ __device__ + inline THRUST_HOST_DEVICE void decrement(); // Distance is calculated using the first iterator in the tuple. template - inline __host__ __device__ + inline THRUST_HOST_DEVICE typename super_t::difference_type distance_to(const zip_iterator &other) const; @@ -229,7 +229,7 @@ template * \see zip_iterator */ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE zip_iterator> make_zip_iterator(thrust::tuple t); @@ -242,7 +242,7 @@ zip_iterator> make_zip_iterator(thrust::tuple -inline __host__ __device__ +inline THRUST_HOST_DEVICE zip_iterator> make_zip_iterator(Iterators... its); diff --git a/thrust/logical.h b/thrust/logical.h index 5a8dbbecf..6eb81bc0a 100644 --- a/thrust/logical.h +++ b/thrust/logical.h @@ -71,7 +71,7 @@ THRUST_NAMESPACE_BEGIN * \see transform_reduce */ template -__host__ __device__ +THRUST_HOST_DEVICE bool all_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); @@ -148,7 +148,7 @@ bool all_of(InputIterator first, InputIterator last, Predicate pred); * \see transform_reduce */ template -__host__ __device__ +THRUST_HOST_DEVICE bool any_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); @@ -226,7 +226,7 @@ bool any_of(InputIterator first, InputIterator last, Predicate pred); * \see transform_reduce */ template -__host__ __device__ +THRUST_HOST_DEVICE bool none_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); diff --git a/thrust/memory.h b/thrust/memory.h index 12425c39f..8d52dffc8 100644 --- a/thrust/memory.h +++ b/thrust/memory.h @@ -84,7 +84,7 @@ templatepointer from a T*. @@ -93,7 +93,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE explicit pointer(OtherElement *ptr); /*! This contructor allows initialization from another pointer-like object. @@ -104,7 +104,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE pointer(const OtherPointer &other, typename thrust::detail::enable_if_pointer_is_convertible< OtherPointer, @@ -121,7 +121,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE typename thrust::detail::enable_if_pointer_is_convertible< OtherPointer, pointer, @@ -132,7 +132,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE pointer malloc(const thrust::detail::execution_policy_base &system, std::size_t n); @@ -208,7 +208,7 @@ pointer malloc(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE pointer malloc(const thrust::detail::execution_policy_base &system, std::size_t n); @@ -261,7 +261,7 @@ pointer malloc(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair, typename thrust::pointer::difference_type> get_temporary_buffer(const thrust::detail::execution_policy_base &system, typename thrust::pointer::difference_type n); @@ -294,7 +294,7 @@ get_temporary_buffer(const thrust::detail::execution_policy_base * \endcode */ template -__host__ __device__ +THRUST_HOST_DEVICE void free(const thrust::detail::execution_policy_base &system, Pointer ptr); @@ -341,7 +341,7 @@ void free(const thrust::detail::execution_policy_base &system, Po * \see get_temporary_buffer */ template -__host__ __device__ +THRUST_HOST_DEVICE void return_temporary_buffer(const thrust::detail::execution_policy_base &system, Pointer p, std::ptrdiff_t n); @@ -353,7 +353,7 @@ void return_temporary_buffer(const thrust::detail::execution_policy_base -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::pointer_traits::raw_pointer raw_pointer_cast(Pointer ptr); @@ -370,7 +370,7 @@ typename thrust::detail::pointer_traits::raw_pointer * \see raw_pointer_cast */ template -__host__ __device__ +THRUST_HOST_DEVICE typename detail::raw_reference::type raw_reference_cast(T &ref); @@ -387,7 +387,7 @@ typename detail::raw_reference::type * \see raw_pointer_cast */ template -__host__ __device__ +THRUST_HOST_DEVICE typename detail::raw_reference::type raw_reference_cast(const T &ref); diff --git a/thrust/merge.h b/thrust/merge.h index 724f4c167..b308ae532 100644 --- a/thrust/merge.h +++ b/thrust/merge.h @@ -97,7 +97,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -233,7 +233,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -392,7 +392,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, @@ -573,7 +573,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, diff --git a/thrust/mismatch.h b/thrust/mismatch.h index bbdf2923a..d69953b38 100644 --- a/thrust/mismatch.h +++ b/thrust/mismatch.h @@ -85,7 +85,7 @@ THRUST_NAMESPACE_BEGIN * \see find_if */ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -191,7 +191,7 @@ thrust::pair mismatch(InputIterator1 first1, * \see find_if */ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/mr/allocator.h b/thrust/mr/allocator.h index 70653e1ab..4f2194b31 100644 --- a/thrust/mr/allocator.h +++ b/thrust/mr/allocator.h @@ -92,8 +92,8 @@ class allocator : private validator * * \return the maximum value of \p std::size_t, divided by the size of \p T. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); @@ -103,14 +103,14 @@ class allocator : private validator * * \param resource the resource to be used to allocate raw memory. */ - __host__ __device__ + THRUST_HOST_DEVICE allocator(MR * resource) : mem_res(resource) { } /*! Copy constructor. Copies the resource pointer. */ template - __host__ __device__ + THRUST_HOST_DEVICE allocator(const allocator & other) : mem_res(other.resource()) { } @@ -121,7 +121,7 @@ class allocator : private validator * \return a pointer to the newly allocated storage. */ THRUST_NODISCARD - __host__ + THRUST_HOST pointer allocate(size_type n) { return static_cast(mem_res->do_allocate(n * sizeof(T), THRUST_ALIGNOF(T))); @@ -132,7 +132,7 @@ class allocator : private validator * \param p pointer returned by a previous call to \p allocate * \param n number of elements, passed as an argument to the \p allocate call that produced \p p */ - __host__ + THRUST_HOST void deallocate(pointer p, size_type n) { return mem_res->do_deallocate(p, n * sizeof(T), THRUST_ALIGNOF(T)); @@ -142,7 +142,7 @@ class allocator : private validator * * \return the memory resource used by this allocator. */ - __host__ __device__ + THRUST_HOST_DEVICE MR * resource() const { return mem_res; @@ -154,7 +154,7 @@ class allocator : private validator /*! Compares the allocators for equality by comparing the underlying memory resources. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const allocator & lhs, const allocator & rhs) noexcept { return *lhs.resource() == *rhs.resource(); @@ -162,7 +162,7 @@ bool operator==(const allocator & lhs, const allocator & rhs) noex /*! Compares the allocators for inequality by comparing the underlying memory resources. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const allocator & lhs, const allocator & rhs) noexcept { return !(lhs == rhs); @@ -231,20 +231,20 @@ class stateless_resource_allocator : public thrust::mr::allocator /*! Default constructor. Uses \p get_global_resource to get the global instance of \p Upstream and initializes the * \p allocator base subobject with that resource. */ - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE stateless_resource_allocator() : base(get_global_resource()) { } /*! Copy constructor. Copies the memory resource pointer. */ - __host__ __device__ + THRUST_HOST_DEVICE stateless_resource_allocator(const stateless_resource_allocator & other) : base(other) {} /*! Conversion constructor from an allocator of a different type. Copies the memory resource pointer. */ template - __host__ __device__ + THRUST_HOST_DEVICE stateless_resource_allocator(const stateless_resource_allocator & other) : base(other) {} @@ -254,7 +254,7 @@ class stateless_resource_allocator : public thrust::mr::allocator #endif /*! Destructor. */ - __host__ __device__ + THRUST_HOST_DEVICE ~stateless_resource_allocator() {} }; diff --git a/thrust/mr/disjoint_pool.h b/thrust/mr/disjoint_pool.h index 52b5a297d..9a14b79d3 100644 --- a/thrust/mr/disjoint_pool.h +++ b/thrust/mr/disjoint_pool.h @@ -174,13 +174,13 @@ class disjoint_unsynchronized_pool_resource final std::size_t alignment; void_ptr pointer; - __host__ __device__ + THRUST_HOST_DEVICE bool operator==(const oversized_block_descriptor & other) const { return size == other.size && alignment == other.alignment && pointer == other.pointer; } - __host__ __device__ + THRUST_HOST_DEVICE bool operator<(const oversized_block_descriptor & other) const { return size < other.size || (size == other.size && alignment < other.alignment); @@ -190,12 +190,12 @@ class disjoint_unsynchronized_pool_resource final struct equal_pointers { public: - __host__ __device__ + THRUST_HOST_DEVICE equal_pointers(void_ptr p) : p(p) { } - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const oversized_block_descriptor & desc) const { return desc.pointer == p; @@ -208,12 +208,12 @@ class disjoint_unsynchronized_pool_resource final struct matching_alignment { public: - __host__ __device__ + THRUST_HOST_DEVICE matching_alignment(std::size_t requested) : requested(requested) { } - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const oversized_block_descriptor & desc) const { return desc.alignment >= requested; @@ -235,14 +235,14 @@ class disjoint_unsynchronized_pool_resource final struct pool { - __host__ + THRUST_HOST pool(const pointer_vector & free) : free_blocks(free), previous_allocated_count(0) { } - __host__ + THRUST_HOST pool(const pool & other) : free_blocks(other.free_blocks), previous_allocated_count(other.previous_allocated_count) @@ -253,7 +253,7 @@ class disjoint_unsynchronized_pool_resource final pool & operator=(const pool &) = default; #endif - __host__ + THRUST_HOST ~pool() {} pointer_vector free_blocks; diff --git a/thrust/mr/disjoint_tls_pool.h b/thrust/mr/disjoint_tls_pool.h index a5bdb0605..3d475eb07 100644 --- a/thrust/mr/disjoint_tls_pool.h +++ b/thrust/mr/disjoint_tls_pool.h @@ -46,7 +46,7 @@ namespace mr * \param bookkeeper the second argument to the constructor, if invoked */ template -__host__ +THRUST_HOST thrust::mr::disjoint_unsynchronized_pool_resource & tls_disjoint_pool( Upstream * upstream = NULL, Bookkeeper * bookkeeper = NULL) diff --git a/thrust/mr/memory_resource.h b/thrust/mr/memory_resource.h index b2062cdfd..fb5c10610 100644 --- a/thrust/mr/memory_resource.h +++ b/thrust/mr/memory_resource.h @@ -88,7 +88,7 @@ class memory_resource * \param other the other resource to compare this resource to * \return whether the two resources are equivalent. */ - __host__ __device__ + THRUST_HOST_DEVICE bool is_equal(const memory_resource & other) const noexcept { return do_is_equal(other); @@ -119,7 +119,7 @@ class memory_resource * \param other the other resource to compare this resource to * \return whether the two resources are equivalent. */ - __host__ __device__ + THRUST_HOST_DEVICE virtual bool do_is_equal(const memory_resource & other) const noexcept { return this == &other; @@ -152,7 +152,7 @@ class memory_resource do_deallocate(p, bytes, alignment); } - __host__ __device__ + THRUST_HOST_DEVICE bool is_equal(const memory_resource & other) const noexcept { return do_is_equal(other); @@ -160,7 +160,7 @@ class memory_resource virtual pointer do_allocate(std::size_t bytes, std::size_t alignment) = 0; virtual void do_deallocate(pointer p, std::size_t bytes, std::size_t alignment) = 0; - __host__ __device__ + THRUST_HOST_DEVICE virtual bool do_is_equal(const memory_resource & other) const noexcept { return this == &other; @@ -188,7 +188,7 @@ class memory_resource /*! Compares the memory resources for equality, first by identity, then by \p is_equal. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const memory_resource & lhs, const memory_resource & rhs) noexcept { return &lhs == &rhs || rhs.is_equal(rhs); @@ -197,7 +197,7 @@ bool operator==(const memory_resource & lhs, const memory_resource -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const memory_resource & lhs, const memory_resource & rhs) noexcept { return !(lhs == rhs); @@ -209,7 +209,7 @@ bool operator!=(const memory_resource & lhs, const memory_resource -__host__ +THRUST_HOST MR * get_global_resource() { static MR resource; diff --git a/thrust/mr/polymorphic_adaptor.h b/thrust/mr/polymorphic_adaptor.h index a50e3df2c..4ef2eaf35 100644 --- a/thrust/mr/polymorphic_adaptor.h +++ b/thrust/mr/polymorphic_adaptor.h @@ -71,7 +71,7 @@ class polymorphic_adaptor_resource final : public memory_resource /*! Compares this \p polymorphic_adaptor_resource with another \p memory_resource * to see if they are equal. */ - __host__ __device__ + THRUST_HOST_DEVICE virtual bool do_is_equal(const memory_resource & other) const noexcept override { return upstream_resource->is_equal(other); diff --git a/thrust/mr/tls_pool.h b/thrust/mr/tls_pool.h index ae44dbbf7..da677a860 100644 --- a/thrust/mr/tls_pool.h +++ b/thrust/mr/tls_pool.h @@ -43,7 +43,7 @@ namespace mr * \param upstream the argument to the constructor, if invoked */ template -__host__ +THRUST_HOST thrust::mr::unsynchronized_pool_resource & tls_pool(Upstream * upstream = NULL) { static thread_local auto adaptor = [&]{ diff --git a/thrust/optional.h b/thrust/optional.h index d2a48c547..68c638e1c 100644 --- a/thrust/optional.h +++ b/thrust/optional.h @@ -229,7 +229,8 @@ template struct is_const_or_const_ref : std::true_type{}; #endif // std::invoke from C++17 -__thrust_exec_check_disable__ +// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround +THRUST_EXEC_CHECK_DISABLE template ::value @@ -237,7 +238,7 @@ template >::value>, int = 0> -__host__ __device__ +THRUST_HOST_DEVICE constexpr auto invoke(Fn &&f, Args &&... args) noexcept(noexcept(std::mem_fn(f)(std::forward(args)...))) THRUST_TRAILING_RETURN(decltype(std::mem_fn(f)(std::forward(args)...))) @@ -245,10 +246,10 @@ constexpr auto invoke(Fn &&f, Args &&... args) return std::mem_fn(f)(std::forward(args)...); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template >::value>> -__host__ __device__ +THRUST_HOST_DEVICE constexpr auto invoke(Fn &&f, Args &&... args) noexcept(noexcept(std::forward(f)(std::forward(args)...))) THRUST_TRAILING_RETURN(decltype(std::forward(f)(std::forward(args)...))) @@ -406,19 +407,19 @@ struct is_nothrow_swappable // destructible. template ::value> struct optional_storage_base { - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional_storage_base() noexcept : m_dummy(), m_has_value(false) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional_storage_base(in_place_t, U &&... u) : m_value(std::forward(u)...), m_has_value(true) {} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE ~optional_storage_base() { if (m_has_value) { m_value.~T(); @@ -437,14 +438,14 @@ struct optional_storage_base { // This case is for when T is trivially destructible. template struct optional_storage_base { - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional_storage_base() noexcept : m_dummy(), m_has_value(false) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional_storage_base(in_place_t, U &&... u) : m_value(std::forward(u)...), m_has_value(true) {} @@ -464,24 +465,24 @@ template struct optional_storage_base { template struct optional_operations_base : optional_storage_base { using optional_storage_base::optional_storage_base; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void hard_reset() noexcept { get().~T(); this->m_has_value = false; } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE void construct(Args &&... args) noexcept { new (thrust::addressof(this->m_value)) T(std::forward(args)...); this->m_has_value = true; } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE void assign(Opt &&rhs) { if (this->has_value()) { if (rhs.has_value()) { @@ -497,22 +498,22 @@ template struct optional_operations_base : optional_storage_base { } } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE bool has_value() const { return this->m_has_value; } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T &get() & { return this->m_value; } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR const T &get() const & { return this->m_value; } - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T &&get() && { return std::move(this->m_value); } #ifndef THRUST_OPTIONAL_NO_CONSTRR - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr const T &&get() const && { return std::move(this->m_value); } #endif }; @@ -529,10 +530,10 @@ template struct optional_copy_base : optional_operations_base { using optional_operations_base::optional_operations_base; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_base() = default; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional_copy_base(const optional_copy_base &rhs) { if (rhs.has_value()) { this->construct(rhs.get()); @@ -541,11 +542,11 @@ struct optional_copy_base : optional_operations_base { } } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_base(optional_copy_base &&rhs) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_base &operator=(const optional_copy_base &rhs) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_base &operator=(optional_copy_base &&rhs) = default; }; @@ -556,13 +557,13 @@ struct optional_move_base : optional_copy_base { template struct optional_move_base : optional_copy_base { using optional_copy_base::optional_copy_base; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_base(const optional_move_base &rhs) = default; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional_move_base(optional_move_base &&rhs) noexcept( std::is_nothrow_move_constructible::value) { if (rhs.has_value()) { @@ -571,9 +572,9 @@ template struct optional_move_base : optional_copy_base { this->m_has_value = false; } } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_base &operator=(const optional_move_base &rhs) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_base &operator=(optional_move_base &&rhs) = default; }; @@ -589,20 +590,20 @@ template struct optional_copy_assign_base : optional_move_base { using optional_move_base::optional_move_base; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_assign_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_assign_base(const optional_copy_assign_base &rhs) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_assign_base(optional_copy_assign_base &&rhs) = default; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional_copy_assign_base &operator=(const optional_copy_assign_base &rhs) { this->assign(rhs); return *this; } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_copy_assign_base & operator=(optional_copy_assign_base &&rhs) = default; }; @@ -619,20 +620,20 @@ template struct optional_move_assign_base : optional_copy_assign_base { using optional_copy_assign_base::optional_copy_assign_base; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_assign_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_assign_base(const optional_move_assign_base &rhs) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_assign_base(optional_move_assign_base &&rhs) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_move_assign_base & operator=(const optional_move_assign_base &rhs) = default; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional_move_assign_base & operator=(optional_move_assign_base &&rhs) noexcept( std::is_nothrow_move_constructible::value @@ -647,61 +648,61 @@ struct optional_move_assign_base : optional_copy_assign_base { template ::value, bool EnableMove = std::is_move_constructible::value> struct optional_delete_ctor_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(const optional_delete_ctor_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(const optional_delete_ctor_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(optional_delete_ctor_base &&) noexcept = default; }; template struct optional_delete_ctor_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(const optional_delete_ctor_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(const optional_delete_ctor_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(optional_delete_ctor_base &&) noexcept = default; }; template struct optional_delete_ctor_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(const optional_delete_ctor_base &) = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(const optional_delete_ctor_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(optional_delete_ctor_base &&) noexcept = default; }; template struct optional_delete_ctor_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(const optional_delete_ctor_base &) = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(const optional_delete_ctor_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_ctor_base & operator=(optional_delete_ctor_base &&) noexcept = default; }; @@ -714,65 +715,65 @@ template ::value && std::is_move_assignable::value)> struct optional_delete_assign_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(const optional_delete_assign_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(optional_delete_assign_base &&) noexcept = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(const optional_delete_assign_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(optional_delete_assign_base &&) noexcept = default; }; template struct optional_delete_assign_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(const optional_delete_assign_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(optional_delete_assign_base &&) noexcept = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(const optional_delete_assign_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(optional_delete_assign_base &&) noexcept = delete; }; template struct optional_delete_assign_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(const optional_delete_assign_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(optional_delete_assign_base &&) noexcept = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(const optional_delete_assign_base &) = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(optional_delete_assign_base &&) noexcept = default; }; template struct optional_delete_assign_base { - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base() = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(const optional_delete_assign_base &) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base(optional_delete_assign_base &&) noexcept = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(const optional_delete_assign_base &) = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional_delete_assign_base & operator=(optional_delete_assign_base &&) noexcept = delete; }; @@ -782,7 +783,7 @@ template struct optional_delete_assign_base { /// \brief A tag type to represent an empty optional struct nullopt_t { struct do_not_use {}; - __host__ __device__ + THRUST_HOST_DEVICE constexpr explicit nullopt_t(do_not_use, do_not_use) noexcept {} }; /// \brief Represents an empty optional @@ -800,7 +801,7 @@ static constexpr nullopt_t nullopt{nullopt_t::do_not_use{}, class bad_optional_access : public std::exception { public: bad_optional_access() = default; - __host__ + THRUST_HOST const char *what() const noexcept { return "Optional has no value"; } }; @@ -838,9 +839,9 @@ class optional : private detail::optional_move_assign_base, /// is returned. /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto and_then(F &&f) & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -852,9 +853,9 @@ class optional : private detail::optional_move_assign_base, /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto and_then(F &&f) && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -866,9 +867,9 @@ class optional : private detail::optional_move_assign_base, /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto and_then(F &&f) const & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -881,9 +882,9 @@ class optional : private detail::optional_move_assign_base, #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto and_then(F &&f) const && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -904,9 +905,9 @@ class optional : private detail::optional_move_assign_base, /// `std::invoke(std::forward(f), value())` is returned. /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR detail::invoke_result_t and_then(F &&f) & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -918,9 +919,9 @@ class optional : private detail::optional_move_assign_base, /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR detail::invoke_result_t and_then(F &&f) && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -932,9 +933,9 @@ class optional : private detail::optional_move_assign_base, /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr detail::invoke_result_t and_then(F &&f) const & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -947,9 +948,9 @@ class optional : private detail::optional_move_assign_base, #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr detail::invoke_result_t and_then(F &&f) const && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -972,36 +973,36 @@ class optional : private detail::optional_move_assign_base, /// /// \group map /// \synopsis template constexpr auto map(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto map(F &&f) & { return optional_map_impl(*this, std::forward(f)); } /// \group map /// \synopsis template constexpr auto map(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto map(F &&f) && { return optional_map_impl(std::move(*this), std::forward(f)); } /// \group map /// \synopsis template constexpr auto map(F &&f) const&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto map(F &&f) const & { return optional_map_impl(*this, std::forward(f)); } /// \group map /// \synopsis template constexpr auto map(F &&f) const&&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto map(F &&f) const && { return optional_map_impl(std::move(*this), std::forward(f)); } @@ -1015,9 +1016,9 @@ class optional : private detail::optional_move_assign_base, /// /// \group map /// \synopsis template auto map(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR decltype(optional_map_impl(std::declval(), std::declval())) map(F &&f) & { @@ -1026,9 +1027,9 @@ class optional : private detail::optional_move_assign_base, /// \group map /// \synopsis template auto map(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR decltype(optional_map_impl(std::declval(), std::declval())) map(F &&f) && { @@ -1037,9 +1038,9 @@ class optional : private detail::optional_move_assign_base, /// \group map /// \synopsis template auto map(F &&f) const&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr decltype(optional_map_impl(std::declval(), std::declval())) map(F &&f) const & { @@ -1049,9 +1050,9 @@ class optional : private detail::optional_move_assign_base, #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group map /// \synopsis template auto map(F &&f) const&&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr decltype(optional_map_impl(std::declval(), std::declval())) map(F &&f) const && { @@ -1069,9 +1070,9 @@ class optional : private detail::optional_move_assign_base, /// /// \group or_else /// \synopsis template optional or_else (F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) & { if (has_value()) return *this; @@ -1081,18 +1082,18 @@ class optional : private detail::optional_move_assign_base, } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) & { return has_value() ? *this : std::forward(f)(); } /// \group or_else /// \synopsis template optional or_else (F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) && { if (has_value()) return std::move(*this); @@ -1102,18 +1103,18 @@ class optional : private detail::optional_move_assign_base, } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) && { return has_value() ? std::move(*this) : std::forward(f)(); } /// \group or_else /// \synopsis template optional or_else (F &&f) const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) const & { if (has_value()) return *this; @@ -1123,18 +1124,18 @@ class optional : private detail::optional_move_assign_base, } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) const & { return has_value() ? *this : std::forward(f)(); } #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) const && { if (has_value()) return std::move(*this); @@ -1144,9 +1145,9 @@ class optional : private detail::optional_move_assign_base, } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) const && { return has_value() ? std::move(*this) : std::forward(f)(); } @@ -1159,27 +1160,27 @@ class optional : private detail::optional_move_assign_base, /// and the value is returned. Otherwise `u` is returned. /// /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u); } /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u); } /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) const & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u); @@ -1187,9 +1188,9 @@ class optional : private detail::optional_move_assign_base, #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) const && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u); @@ -1205,9 +1206,9 @@ class optional : private detail::optional_move_assign_base, /// /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u)(); @@ -1216,9 +1217,9 @@ class optional : private detail::optional_move_assign_base, /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) /// &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u)(); @@ -1227,9 +1228,9 @@ class optional : private detail::optional_move_assign_base, /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) /// const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) const & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u)(); @@ -1239,9 +1240,9 @@ class optional : private detail::optional_move_assign_base, /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) /// const &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) const && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u)(); @@ -1249,9 +1250,9 @@ class optional : private detail::optional_move_assign_base, #endif /// \return `u` if `*this` has a value, otherwise an empty optional. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr optional::type> conjunction(U &&u) const { using result = optional>; return has_value() ? result{u} : result{nullopt}; @@ -1259,60 +1260,60 @@ class optional : private detail::optional_move_assign_base, /// \return `rhs` if `*this` is empty, otherwise the current value. /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(const optional &rhs) & { return has_value() ? *this : rhs; } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(const optional &rhs) const & { return has_value() ? *this : rhs; } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(const optional &rhs) && { return has_value() ? std::move(*this) : rhs; } #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(const optional &rhs) const && { return has_value() ? std::move(*this) : rhs; } #endif /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(optional &&rhs) & { return has_value() ? *this : std::move(rhs); } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(optional &&rhs) const & { return has_value() ? *this : std::move(rhs); } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(optional &&rhs) && { return has_value() ? std::move(*this) : std::move(rhs); } #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(optional &&rhs) const && { return has_value() ? std::move(*this) : std::move(rhs); } @@ -1320,8 +1321,8 @@ class optional : private detail::optional_move_assign_base, /// Takes the value out of the optional, leaving it empty /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() & { optional ret = *this; reset(); @@ -1329,8 +1330,8 @@ class optional : private detail::optional_move_assign_base, } /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() const & { optional ret = *this; reset(); @@ -1338,8 +1339,8 @@ class optional : private detail::optional_move_assign_base, } /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() && { optional ret = std::move(*this); reset(); @@ -1348,8 +1349,8 @@ class optional : private detail::optional_move_assign_base, #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() const && { optional ret = std::move(*this); reset(); @@ -1361,34 +1362,34 @@ class optional : private detail::optional_move_assign_base, /// Constructs an optional that does not contain a value. /// \group ctor_empty - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE constexpr optional() noexcept = default; /// \group ctor_empty - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional(nullopt_t) noexcept {} /// Copy constructor /// /// If `rhs` contains a value, the stored value is direct-initialized with /// it. Otherwise, the constructed optional is empty. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE THRUST_OPTIONAL_CPP11_CONSTEXPR optional(const optional &rhs) = default; /// Move constructor /// /// If `rhs` contains a value, the stored value is direct-initialized with /// it. Otherwise, the constructed optional is empty. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE THRUST_OPTIONAL_CPP11_CONSTEXPR optional(optional &&rhs) = default; /// Constructs the stored value in-place using the given arguments. /// \group in_place /// \synopsis template constexpr explicit optional(in_place_t, Args&&... args); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr explicit optional( detail::enable_if_t::value, in_place_t>, Args &&... args) @@ -1396,9 +1397,9 @@ class optional : private detail::optional_move_assign_base, /// \group in_place /// \synopsis template \nconstexpr explicit optional(in_place_t, std::initializer_list&, Args&&... args); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR explicit optional( detail::enable_if_t &, Args &&...>::value, @@ -1409,74 +1410,74 @@ class optional : private detail::optional_move_assign_base, /// Constructs the stored value with `u`. /// \synopsis template constexpr optional(U &&u); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template < class U = T, detail::enable_if_t::value> * = nullptr, detail::enable_forward_value * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE constexpr optional(U &&u) : base(in_place, std::forward(u)) {} /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template < class U = T, detail::enable_if_t::value> * = nullptr, detail::enable_forward_value * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE constexpr explicit optional(U &&u) : base(in_place, std::forward(u)) {} /// Converting copy constructor. /// \synopsis template optional(const optional &rhs); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template < class U, detail::enable_from_other * = nullptr, detail::enable_if_t::value> * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional(const optional &rhs) { this->construct(*rhs); } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr, detail::enable_if_t::value> * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE explicit optional(const optional &rhs) { this->construct(*rhs); } /// Converting move constructor. /// \synopsis template optional(optional &&rhs); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template < class U, detail::enable_from_other * = nullptr, detail::enable_if_t::value> * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional(optional &&rhs) { this->construct(std::move(*rhs)); } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template < class U, detail::enable_from_other * = nullptr, detail::enable_if_t::value> * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE explicit optional(optional &&rhs) { this->construct(std::move(*rhs)); } /// Destroys the stored value if there is one. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE ~optional() = default; /// Assignment to empty. /// /// Destroys the current value if there is one. - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional &operator=(nullopt_t) noexcept { if (has_value()) { this->m_value.~T(); @@ -1490,22 +1491,22 @@ class optional : private detail::optional_move_assign_base, /// /// Copies the value from `rhs` if there is one. Otherwise resets the stored /// value in `*this`. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional &operator=(const optional &rhs) = default; /// Move assignment. /// /// Moves the value from `rhs` if there is one. Otherwise resets the stored /// value in `*this`. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional &operator=(optional &&rhs) = default; /// Assigns the stored value from `u`, destroying the old value if there was /// one. /// \synopsis optional &operator=(U &&u); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional &operator=(U &&u) { if (has_value()) { this->m_value = std::forward(u); @@ -1521,10 +1522,10 @@ class optional : private detail::optional_move_assign_base, /// Copies the value from `rhs` if there is one. Otherwise resets the stored /// value in `*this`. /// \synopsis optional &operator=(const optional & rhs); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional &operator=(const optional &rhs) { if (has_value()) { if (rhs.has_value()) { @@ -1547,9 +1548,9 @@ class optional : private detail::optional_move_assign_base, /// Moves the value from `rhs` if there is one. Otherwise resets the stored /// value in `*this`. /// \synopsis optional &operator=(optional && rhs); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional &operator=(optional &&rhs) { if (has_value()) { if (rhs.has_value()) { @@ -1569,9 +1570,9 @@ class optional : private detail::optional_move_assign_base, /// Constructs the value in-place, destroying the current one if there is /// one. /// \group emplace - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE T &emplace(Args &&... args) { static_assert(std::is_constructible::value, "T must be constructible with Args"); @@ -1583,9 +1584,9 @@ class optional : private detail::optional_move_assign_base, /// \group emplace /// \synopsis template \nT& emplace(std::initializer_list il, Args &&... args); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::enable_if_t< std::is_constructible &, Args &&...>::value, T &> @@ -1601,8 +1602,8 @@ class optional : private detail::optional_move_assign_base, /// If both have a value, the values are swapped. /// If one has a value, it is moved to the other and the movee is left /// valueless. - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void swap(optional &rhs) noexcept(std::is_nothrow_move_constructible::value &&detail::is_nothrow_swappable::value) { @@ -1624,16 +1625,16 @@ class optional : private detail::optional_move_assign_base, /// \requires a value is stored /// \group pointer /// \synopsis constexpr const T *operator->() const; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr const T *operator->() const { return addressof(this->m_value); } /// \group pointer /// \synopsis constexpr T *operator->(); - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T *operator->() { return addressof(this->m_value); } @@ -1642,39 +1643,39 @@ class optional : private detail::optional_move_assign_base, /// \requires a value is stored /// \group deref /// \synopsis constexpr T &operator*(); - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T &operator*() & { return this->m_value; } /// \group deref /// \synopsis constexpr const T &operator*() const; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr const T &operator*() const & { return this->m_value; } /// \exclude - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T &&operator*() && { return std::move(this->m_value); } #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \exclude - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr const T &&operator*() const && { return std::move(this->m_value); } #endif /// \return whether or not the optional has a value /// \group has_value - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool has_value() const noexcept { return this->m_has_value; } /// \group has_value - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr explicit operator bool() const noexcept { return this->m_has_value; } @@ -1683,7 +1684,7 @@ class optional : private detail::optional_move_assign_base, /// [bad_optional_access] /// \group value /// \synopsis constexpr T &value(); - __host__ + THRUST_HOST THRUST_OPTIONAL_CPP11_CONSTEXPR T &value() & { if (has_value()) return this->m_value; @@ -1691,14 +1692,14 @@ class optional : private detail::optional_move_assign_base, } /// \group value /// \synopsis constexpr const T &value() const; - __host__ + THRUST_HOST THRUST_OPTIONAL_CPP11_CONSTEXPR const T &value() const & { if (has_value()) return this->m_value; throw bad_optional_access(); } /// \exclude - __host__ + THRUST_HOST THRUST_OPTIONAL_CPP11_CONSTEXPR T &&value() && { if (has_value()) return std::move(this->m_value); @@ -1707,7 +1708,7 @@ class optional : private detail::optional_move_assign_base, #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \exclude - __host__ + THRUST_HOST THRUST_OPTIONAL_CPP11_CONSTEXPR const T &&value() const && { if (has_value()) return std::move(this->m_value); @@ -1717,9 +1718,9 @@ class optional : private detail::optional_move_assign_base, /// \return the stored value if there is one, otherwise returns `u` /// \group value_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr T value_or(U &&u) const & { static_assert(std::is_copy_constructible::value && std::is_convertible::value, @@ -1728,9 +1729,9 @@ class optional : private detail::optional_move_assign_base, } /// \group value_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T value_or(U &&u) && { static_assert(std::is_move_constructible::value && std::is_convertible::value, @@ -1739,8 +1740,8 @@ class optional : private detail::optional_move_assign_base, } /// Destroys the stored value if one exists, making the optional empty - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void reset() noexcept { if (has_value()) { this->m_value.~T(); @@ -1755,51 +1756,51 @@ class optional : private detail::optional_move_assign_base, /// relational operators. Otherwise `lhs` and `rhs` are equal only if they are /// both empty, and `lhs` is less than `rhs` only if `rhs` is empty and `lhs` /// is not. -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator==(const optional &lhs, const optional &rhs) { return lhs.has_value() == rhs.has_value() && (!lhs.has_value() || *lhs == *rhs); } /// \group relop -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator!=(const optional &lhs, const optional &rhs) { return lhs.has_value() != rhs.has_value() || (lhs.has_value() && *lhs != *rhs); } /// \group relop -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<(const optional &lhs, const optional &rhs) { return rhs.has_value() && (!lhs.has_value() || *lhs < *rhs); } /// \group relop -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>(const optional &lhs, const optional &rhs) { return lhs.has_value() && (!rhs.has_value() || *lhs > *rhs); } /// \group relop -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<=(const optional &lhs, const optional &rhs) { return !lhs.has_value() || (rhs.has_value() && *lhs <= *rhs); } /// \group relop -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>=(const optional &lhs, const optional &rhs) { return !rhs.has_value() || (lhs.has_value() && *lhs >= *rhs); @@ -1808,86 +1809,86 @@ inline constexpr bool operator>=(const optional &lhs, /// \group relop_nullopt /// \brief Compares an optional to a `nullopt` /// \details Equivalent to comparing the optional to an empty optional -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator==(const optional &lhs, nullopt_t) noexcept { return !lhs.has_value(); } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator==(nullopt_t, const optional &rhs) noexcept { return !rhs.has_value(); } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator!=(const optional &lhs, nullopt_t) noexcept { return lhs.has_value(); } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator!=(nullopt_t, const optional &rhs) noexcept { return rhs.has_value(); } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<(const optional &, nullopt_t) noexcept { return false; } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<(nullopt_t, const optional &rhs) noexcept { return rhs.has_value(); } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<=(const optional &lhs, nullopt_t) noexcept { return !lhs.has_value(); } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<=(nullopt_t, const optional &) noexcept { return true; } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>(const optional &lhs, nullopt_t) noexcept { return lhs.has_value(); } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>(nullopt_t, const optional &) noexcept { return false; } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>=(const optional &, nullopt_t) noexcept { return true; } /// \group relop_nullopt -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>=(nullopt_t, const optional &rhs) noexcept { return !rhs.has_value(); } @@ -1897,96 +1898,96 @@ inline constexpr bool operator>=(nullopt_t, const optional &rhs) noexcept { /// \details If the optional has a value, it is compared with the other value /// using `T`s relational operators. Otherwise, the optional is considered /// less than the value. -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator==(const optional &lhs, const U &rhs) { return lhs.has_value() ? *lhs == rhs : false; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator==(const U &lhs, const optional &rhs) { return rhs.has_value() ? lhs == *rhs : false; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator!=(const optional &lhs, const U &rhs) { return lhs.has_value() ? *lhs != rhs : true; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator!=(const U &lhs, const optional &rhs) { return rhs.has_value() ? lhs != *rhs : true; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<(const optional &lhs, const U &rhs) { return lhs.has_value() ? *lhs < rhs : true; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<(const U &lhs, const optional &rhs) { return rhs.has_value() ? lhs < *rhs : false; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<=(const optional &lhs, const U &rhs) { return lhs.has_value() ? *lhs <= rhs : true; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator<=(const U &lhs, const optional &rhs) { return rhs.has_value() ? lhs <= *rhs : false; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>(const optional &lhs, const U &rhs) { return lhs.has_value() ? *lhs > rhs : false; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>(const U &lhs, const optional &rhs) { return rhs.has_value() ? lhs > *rhs : true; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>=(const optional &lhs, const U &rhs) { return lhs.has_value() ? *lhs >= rhs : false; } /// \group relop_t -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr bool operator>=(const U &lhs, const optional &rhs) { return rhs.has_value() ? lhs >= *rhs : true; } /// \synopsis template \nvoid swap(optional &lhs, optional &rhs); -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template ::value> * = nullptr, detail::enable_if_t::value> * = nullptr> -__host__ __device__ +THRUST_HOST_DEVICE void swap(optional &lhs, optional &rhs) noexcept(noexcept(lhs.swap(rhs))) { return lhs.swap(rhs); @@ -1996,25 +1997,25 @@ namespace detail { struct i_am_secret {}; } // namespace detail -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template ::value, detail::decay_t, T>> -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr optional make_optional(U &&v) { return optional(std::forward(v)); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr optional make_optional(Args &&... args) { return optional(in_place, std::forward(args)...); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE inline constexpr optional make_optional(std::initializer_list il, Args &&... args) { return optional(in_place, il, std::forward(args)...); @@ -2029,24 +2030,24 @@ template optional(T)->optional; /// \exclude namespace detail { #ifdef THRUST_OPTIONAL_CPP14 -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template (), *std::declval())), detail::enable_if_t::value> * = nullptr> -__host__ __device__ +THRUST_HOST_DEVICE constexpr auto optional_map_impl(Opt &&opt, F &&f) { return opt.has_value() ? detail::invoke(std::forward(f), *std::forward(opt)) : optional(nullopt); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template (), *std::declval())), detail::enable_if_t::value> * = nullptr> -__host__ __device__ +THRUST_HOST_DEVICE auto optional_map_impl(Opt &&opt, F &&f) { if (opt.has_value()) { detail::invoke(std::forward(f), *std::forward(opt)); @@ -2056,24 +2057,24 @@ auto optional_map_impl(Opt &&opt, F &&f) { return optional(nullopt); } #else -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template (), *std::declval())), detail::enable_if_t::value> * = nullptr> -__host__ __device__ +THRUST_HOST_DEVICE constexpr optional optional_map_impl(Opt &&opt, F &&f) { return opt.has_value() ? detail::invoke(std::forward(f), *std::forward(opt)) : optional(nullopt); } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template (), *std::declval())), detail::enable_if_t::value> * = nullptr> -__host__ __device__ +THRUST_HOST_DEVICE auto optional_map_impl(Opt &&opt, F &&f) -> optional { if (opt.has_value()) { @@ -2127,9 +2128,9 @@ template class optional { /// is returned. /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto and_then(F &&f) & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2141,9 +2142,9 @@ template class optional { /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto and_then(F &&f) && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2155,9 +2156,9 @@ template class optional { /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto and_then(F &&f) const & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2170,9 +2171,9 @@ template class optional { #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto and_then(F &&f) const && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2193,9 +2194,9 @@ template class optional { /// is returned. /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR detail::invoke_result_t and_then(F &&f) & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2207,9 +2208,9 @@ template class optional { /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR detail::invoke_result_t and_then(F &&f) && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2221,9 +2222,9 @@ template class optional { /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr detail::invoke_result_t and_then(F &&f) const & { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2236,9 +2237,9 @@ template class optional { #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group and_then /// \synopsis template \nconstexpr auto and_then(F &&f) const &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr detail::invoke_result_t and_then(F &&f) const && { using result = detail::invoke_result_t; static_assert(detail::is_optional::value, @@ -2261,36 +2262,36 @@ template class optional { /// /// \group map /// \synopsis template constexpr auto map(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto map(F &&f) & { return detail::optional_map_impl(*this, std::forward(f)); } /// \group map /// \synopsis template constexpr auto map(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR auto map(F &&f) && { return detail::optional_map_impl(std::move(*this), std::forward(f)); } /// \group map /// \synopsis template constexpr auto map(F &&f) const&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto map(F &&f) const & { return detail::optional_map_impl(*this, std::forward(f)); } /// \group map /// \synopsis template constexpr auto map(F &&f) const&&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr auto map(F &&f) const && { return detail::optional_map_impl(std::move(*this), std::forward(f)); } @@ -2304,9 +2305,9 @@ template class optional { /// /// \group map /// \synopsis template auto map(F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR decltype(detail::optional_map_impl(std::declval(), std::declval())) map(F &&f) & { @@ -2315,9 +2316,9 @@ template class optional { /// \group map /// \synopsis template auto map(F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR decltype(detail::optional_map_impl(std::declval(), std::declval())) map(F &&f) && { @@ -2326,9 +2327,9 @@ template class optional { /// \group map /// \synopsis template auto map(F &&f) const&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr decltype(detail::optional_map_impl(std::declval(), std::declval())) map(F &&f) const & { @@ -2338,9 +2339,9 @@ template class optional { #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group map /// \synopsis template auto map(F &&f) const&&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr decltype(detail::optional_map_impl(std::declval(), std::declval())) map(F &&f) const && { @@ -2357,9 +2358,9 @@ template class optional { /// /// \group or_else /// \synopsis template optional or_else (F &&f) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) & { if (has_value()) @@ -2370,9 +2371,9 @@ template class optional { } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) & { return has_value() ? *this : std::forward(f)(); @@ -2380,9 +2381,9 @@ template class optional { /// \group or_else /// \synopsis template optional or_else (F &&f) &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) && { if (has_value()) return std::move(*this); @@ -2392,18 +2393,18 @@ template class optional { } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) && { return has_value() ? std::move(*this) : std::forward(f)(); } /// \group or_else /// \synopsis template optional or_else (F &&f) const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) const & { if (has_value()) return *this; @@ -2413,18 +2414,18 @@ template class optional { } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional THRUST_OPTIONAL_CPP11_CONSTEXPR or_else(F &&f) const & { return has_value() ? *this : std::forward(f)(); } #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) const && { if (has_value()) return std::move(*this); @@ -2434,9 +2435,9 @@ template class optional { } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional or_else(F &&f) const && { return has_value() ? std::move(*this) : std::forward(f)(); } @@ -2449,27 +2450,27 @@ template class optional { /// and the value is returned. Otherwise `u` is returned. /// /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u); } /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u); } /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) const & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u); @@ -2477,9 +2478,9 @@ template class optional { #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group map_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE U map_or(F &&f, U &&u) const && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u); @@ -2495,9 +2496,9 @@ template class optional { /// /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u)(); @@ -2506,9 +2507,9 @@ template class optional { /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) /// &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u)(); @@ -2517,9 +2518,9 @@ template class optional { /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) /// const &; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) const & { return has_value() ? detail::invoke(std::forward(f), **this) : std::forward(u)(); @@ -2529,9 +2530,9 @@ template class optional { /// \group map_or_else /// \synopsis template \nauto map_or_else(F &&f, U &&u) /// const &&; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE detail::invoke_result_t map_or_else(F &&f, U &&u) const && { return has_value() ? detail::invoke(std::forward(f), std::move(**this)) : std::forward(u)(); @@ -2539,9 +2540,9 @@ template class optional { #endif /// \return `u` if `*this` has a value, otherwise an empty optional. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr optional::type> conjunction(U &&u) const { using result = optional>; return has_value() ? result{u} : result{nullopt}; @@ -2549,60 +2550,60 @@ template class optional { /// \return `rhs` if `*this` is empty, otherwise the current value. /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(const optional &rhs) & { return has_value() ? *this : rhs; } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(const optional &rhs) const & { return has_value() ? *this : rhs; } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(const optional &rhs) && { return has_value() ? std::move(*this) : rhs; } #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(const optional &rhs) const && { return has_value() ? std::move(*this) : rhs; } #endif /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(optional &&rhs) & { return has_value() ? *this : std::move(rhs); } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(optional &&rhs) const & { return has_value() ? *this : std::move(rhs); } /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR optional disjunction(optional &&rhs) && { return has_value() ? std::move(*this) : std::move(rhs); } #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group disjunction - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional disjunction(optional &&rhs) const && { return has_value() ? std::move(*this) : std::move(rhs); } @@ -2610,8 +2611,8 @@ template class optional { /// Takes the value out of the optional, leaving it empty /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() & { optional ret = *this; reset(); @@ -2619,8 +2620,8 @@ template class optional { } /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() const & { optional ret = *this; reset(); @@ -2628,8 +2629,8 @@ template class optional { } /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() && { optional ret = std::move(*this); reset(); @@ -2638,8 +2639,8 @@ template class optional { #ifndef THRUST_OPTIONAL_NO_CONSTRR /// \group take - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional take() const && { optional ret = std::move(*this); reset(); @@ -2651,55 +2652,55 @@ template class optional { /// Constructs an optional that does not contain a value. /// \group ctor_empty - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional() noexcept : m_value(nullptr) {} /// \group ctor_empty - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr optional(nullopt_t) noexcept : m_value(nullptr) {} /// Copy constructor /// /// If `rhs` contains a value, the stored value is direct-initialized with /// it. Otherwise, the constructed optional is empty. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE THRUST_OPTIONAL_CPP11_CONSTEXPR optional(const optional &rhs) noexcept = default; /// Move constructor /// /// If `rhs` contains a value, the stored value is direct-initialized with /// it. Otherwise, the constructed optional is empty. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE THRUST_OPTIONAL_CPP11_CONSTEXPR optional(optional &&rhs) = default; /// Constructs the stored value with `u`. /// \synopsis template constexpr optional(U &&u); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template >::value> * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE constexpr optional(U &&u) : m_value(addressof(u)) { static_assert(std::is_lvalue_reference::value, "U must be an lvalue"); } /// \exclude - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr explicit optional(const optional &rhs) : optional(*rhs) {} /// No-op - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE ~optional() = default; /// Assignment to empty. /// /// Destroys the current value if there is one. - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE optional &operator=(nullopt_t) noexcept { m_value = nullptr; return *this; @@ -2709,18 +2710,18 @@ template class optional { /// /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise /// resets the stored value in `*this`. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE optional &operator=(const optional &rhs) = default; /// Rebinds this optional to `u`. /// /// \requires `U` must be an lvalue reference. /// \synopsis optional &operator=(U &&u); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template >::value> * = nullptr> - __host__ __device__ + THRUST_HOST_DEVICE optional &operator=(U &&u) { static_assert(std::is_lvalue_reference::value, "U must be an lvalue"); m_value = addressof(u); @@ -2731,9 +2732,9 @@ template class optional { /// /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise /// resets the stored value in `*this`. - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE optional &operator=(const optional &rhs) { m_value = addressof(rhs.value()); return *this; @@ -2743,9 +2744,9 @@ template class optional { /// one. /// /// \group emplace - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE T &emplace(U& u) noexcept { m_value = addressof(u); return *m_value; @@ -2757,46 +2758,46 @@ template class optional { /// If both have a value, the values are swapped. /// If one has a value, it is moved to the other and the movee is left /// valueless. - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE void swap(optional &rhs) noexcept { std::swap(m_value, rhs.m_value); } /// \return a pointer to the stored value /// \requires a value is stored /// \group pointer /// \synopsis constexpr const T *operator->() const; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr const T *operator->() const { return m_value; } /// \group pointer /// \synopsis constexpr T *operator->(); - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T *operator->() { return m_value; } /// \return the stored value /// \requires a value is stored /// \group deref /// \synopsis constexpr T &operator*(); - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE THRUST_OPTIONAL_CPP11_CONSTEXPR T &operator*() { return *m_value; } /// \group deref /// \synopsis constexpr const T &operator*() const; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr const T &operator*() const { return *m_value; } /// \return whether or not the optional has a value /// \group has_value - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr bool has_value() const noexcept { return m_value != nullptr; } /// \group has_value - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE constexpr explicit operator bool() const noexcept { return m_value != nullptr; } @@ -2805,7 +2806,7 @@ template class optional { /// [bad_optional_access] /// \group value /// synopsis constexpr T &value(); - __host__ + THRUST_HOST THRUST_OPTIONAL_CPP11_CONSTEXPR T &value() { if (has_value()) return *m_value; @@ -2813,7 +2814,7 @@ template class optional { } /// \group value /// \synopsis constexpr const T &value() const; - __host__ + THRUST_HOST THRUST_OPTIONAL_CPP11_CONSTEXPR const T &value() const { if (has_value()) return *m_value; @@ -2822,9 +2823,9 @@ template class optional { /// \return the stored value if there is one, otherwise returns `u` /// \group value_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE constexpr T value_or(U &&u) const & { static_assert(std::is_copy_constructible::value && std::is_convertible::value, @@ -2833,9 +2834,9 @@ template class optional { } /// \group value_or - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - __host__ __device__ + THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T value_or(U &&u) && { static_assert(std::is_move_constructible::value && std::is_convertible::value, @@ -2844,7 +2845,7 @@ template class optional { } /// Destroys the stored value if one exists, making the optional empty - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE void reset() noexcept { m_value = nullptr; } private: @@ -2856,8 +2857,8 @@ THRUST_NAMESPACE_END namespace std { // TODO SFINAE template struct hash> { - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE ::std::size_t operator()(const THRUST_NS_QUALIFIER::optional &o) const { if (!o.has_value()) return 0; diff --git a/thrust/pair.h b/thrust/pair.h index 489226f4b..9bf03005d 100644 --- a/thrust/pair.h +++ b/thrust/pair.h @@ -129,14 +129,14 @@ template * and \p second using \c first_type & \c second_type's * default constructors, respectively. */ - __host__ __device__ pair(void); + THRUST_HOST_DEVICE pair(void); /*! This constructor accepts two objects to copy into this \p pair. * * \param x The object to copy into \p first. * \param y The object to copy into \p second. */ - inline __host__ __device__ + inline THRUST_HOST_DEVICE pair(const T1 &x, const T2 &y); /*! This copy constructor copies from a \p pair whose types are @@ -149,7 +149,7 @@ template * \tparam U2 is convertible to \c second_type. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE pair(const pair &p); /*! This copy constructor copies from a std::pair whose types are @@ -162,14 +162,14 @@ template * \tparam U2 is convertible to \c second_type. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE pair(const std::pair &p); /*! \p swap swaps the elements of two pairs. * * \param p The other pair with which to swap. */ - inline __host__ __device__ + inline THRUST_HOST_DEVICE void swap(pair &p); }; // end pair @@ -184,7 +184,7 @@ template * \tparam T2 is a model of Equality Comparable. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator==(const pair &x, const pair &y); @@ -198,7 +198,7 @@ template * \tparam T2 is a model of LessThan Comparable. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator<(const pair &x, const pair &y); @@ -212,7 +212,7 @@ template * \tparam T2 is a model of Equality Comparable. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator!=(const pair &x, const pair &y); @@ -226,7 +226,7 @@ template * \tparam T2 is a model of LessThan Comparable. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator>(const pair &x, const pair &y); @@ -240,7 +240,7 @@ template * \tparam T2 is a model of LessThan Comparable. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator<=(const pair &x, const pair &y); @@ -254,7 +254,7 @@ template * \tparam T2 is a model of LessThan Comparable. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE bool operator>=(const pair &x, const pair &y); @@ -264,7 +264,7 @@ template * \param y The second \p pair to swap. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE void swap(pair &x, pair &y); @@ -278,7 +278,7 @@ template * \tparam T2 There are no requirements on the type of \p T2. */ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE pair make_pair(T1 x, T2 y); diff --git a/thrust/partition.h b/thrust/partition.h index f00a92fb5..2eee1ca06 100644 --- a/thrust/partition.h +++ b/thrust/partition.h @@ -73,7 +73,7 @@ THRUST_NAMESPACE_BEGIN * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -95,7 +95,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -134,7 +134,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -201,7 +201,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -269,7 +269,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -337,7 +337,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -414,7 +414,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -524,7 +524,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 first, @@ -648,7 +648,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -712,7 +712,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -781,7 +781,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -851,7 +851,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -925,7 +925,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -1004,7 +1004,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -1111,7 +1111,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 first, @@ -1237,7 +1237,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition_point(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -1289,7 +1289,7 @@ __host__ __device__ * * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -1350,7 +1350,7 @@ template * * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; @@ -1369,7 +1369,7 @@ template * \see \p partition */ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_partitioned(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -1399,7 +1399,7 @@ __host__ __device__ * * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int &x) * { * return (x % 2) == 0; diff --git a/thrust/per_device_resource.h b/thrust/per_device_resource.h index a6d620f85..bae338075 100644 --- a/thrust/per_device_resource.h +++ b/thrust/per_device_resource.h @@ -37,7 +37,7 @@ THRUST_NAMESPACE_BEGIN * \return a pointer to a global instance of \p MR for the current device. */ template -__host__ +THRUST_HOST MR * get_per_device_resource(const thrust::detail::execution_policy_base & system) { using thrust::system::detail::generic::get_per_device_resource; @@ -76,24 +76,24 @@ class per_device_allocator : public thrust::mr::allocator /*! Default constructor. Uses \p get_global_resource to get the global instance of \p Upstream and initializes the * \p allocator base subobject with that resource. */ - __host__ + THRUST_HOST per_device_allocator() : base(get_per_device_resource(ExecutionPolicy())) { } /*! Copy constructor. Copies the memory resource pointer. */ - __host__ __device__ + THRUST_HOST_DEVICE per_device_allocator(const per_device_allocator & other) : base(other) {} /*! Conversion constructor from an allocator of a different type. Copies the memory resource pointer. */ template - __host__ __device__ + THRUST_HOST_DEVICE per_device_allocator(const per_device_allocator & other) : base(other) {} /*! Destructor. */ - __host__ __device__ + THRUST_HOST_DEVICE ~per_device_allocator() {} }; diff --git a/thrust/random/detail/discard_block_engine.inl b/thrust/random/detail/discard_block_engine.inl index 31128e250..03260f12b 100644 --- a/thrust/random/detail/discard_block_engine.inl +++ b/thrust/random/detail/discard_block_engine.inl @@ -27,7 +27,7 @@ namespace random template - __host__ __device__ + THRUST_HOST_DEVICE discard_block_engine ::discard_block_engine() : m_e(), m_n(0) @@ -35,7 +35,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE discard_block_engine ::discard_block_engine(result_type s) : m_e(s), m_n(0) @@ -43,7 +43,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE discard_block_engine ::discard_block_engine(const base_type &urng) : m_e(urng), m_n(0) @@ -51,7 +51,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void discard_block_engine ::seed(void) { @@ -61,7 +61,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void discard_block_engine ::seed(result_type s) { @@ -71,7 +71,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename discard_block_engine::result_type discard_block_engine ::operator()(void) @@ -91,7 +91,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void discard_block_engine ::discard(unsigned long long z) { @@ -104,7 +104,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE const typename discard_block_engine::base_type & discard_block_engine ::base(void) const @@ -163,7 +163,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool discard_block_engine ::equal(const discard_block_engine &rhs) const { @@ -192,7 +192,7 @@ operator>>(std::basic_istream &is, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const discard_block_engine &lhs, const discard_block_engine &rhs) { @@ -201,7 +201,7 @@ bool operator==(const discard_block_engine &lhs, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const discard_block_engine &lhs, const discard_block_engine &rhs) { diff --git a/thrust/random/detail/linear_congruential_engine.inl b/thrust/random/detail/linear_congruential_engine.inl index fa9fd7d0d..335aa6a32 100644 --- a/thrust/random/detail/linear_congruential_engine.inl +++ b/thrust/random/detail/linear_congruential_engine.inl @@ -29,7 +29,7 @@ namespace random template - __host__ __device__ + THRUST_HOST_DEVICE linear_congruential_engine ::linear_congruential_engine(result_type s) { @@ -38,7 +38,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void linear_congruential_engine ::seed(result_type s) { @@ -51,7 +51,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename linear_congruential_engine::result_type linear_congruential_engine ::operator()(void) @@ -62,7 +62,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void linear_congruential_engine ::discard(unsigned long long z) { @@ -120,7 +120,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE bool linear_congruential_engine ::equal(const linear_congruential_engine &rhs) const { @@ -129,7 +129,7 @@ bool linear_congruential_engine template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const linear_congruential_engine &lhs, const linear_congruential_engine &rhs) { @@ -138,7 +138,7 @@ bool operator==(const linear_congruential_engine &lhs, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const linear_congruential_engine &lhs, const linear_congruential_engine &rhs) { diff --git a/thrust/random/detail/linear_congruential_engine_discard.h b/thrust/random/detail/linear_congruential_engine_discard.h index c8103d9dc..f1e3e5243 100644 --- a/thrust/random/detail/linear_congruential_engine_discard.h +++ b/thrust/random/detail/linear_congruential_engine_discard.h @@ -33,7 +33,7 @@ namespace detail template struct linear_congruential_engine_discard_implementation { - __host__ __device__ + THRUST_HOST_DEVICE static void discard(UIntType &state, unsigned long long z) { for(; z > 0; --z) @@ -49,7 +49,7 @@ template template struct linear_congruential_engine_discard_implementation { - __host__ __device__ + THRUST_HOST_DEVICE static void discard(thrust::detail::uint32_t &state, unsigned long long z) { const thrust::detail::uint32_t modulus = m; @@ -82,7 +82,7 @@ template struct linear_congruential_engine_discard { template - __host__ __device__ + THRUST_HOST_DEVICE static void discard(LinearCongruentialEngine &lcg, unsigned long long z) { typedef typename LinearCongruentialEngine::result_type result_type; diff --git a/thrust/random/detail/linear_feedback_shift_engine.inl b/thrust/random/detail/linear_feedback_shift_engine.inl index ac3ca8673..41a859d2c 100644 --- a/thrust/random/detail/linear_feedback_shift_engine.inl +++ b/thrust/random/detail/linear_feedback_shift_engine.inl @@ -26,7 +26,7 @@ namespace random { template - __host__ __device__ + THRUST_HOST_DEVICE linear_feedback_shift_engine ::linear_feedback_shift_engine(result_type value) { @@ -34,7 +34,7 @@ template } // end linear_feedback_shift_engine::linear_feedback_shift_engine() template - __host__ __device__ + THRUST_HOST_DEVICE void linear_feedback_shift_engine ::seed(result_type value) { @@ -42,7 +42,7 @@ template } // end linear_feedback_shift_engine::seed() template - __host__ __device__ + THRUST_HOST_DEVICE typename linear_feedback_shift_engine::result_type linear_feedback_shift_engine ::operator()(void) @@ -55,7 +55,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void linear_feedback_shift_engine ::discard(unsigned long long z) { @@ -116,7 +116,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool linear_feedback_shift_engine ::equal(const linear_feedback_shift_engine &rhs) const { @@ -125,7 +125,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const linear_feedback_shift_engine &lhs, const linear_feedback_shift_engine &rhs) { @@ -134,7 +134,7 @@ bool operator==(const linear_feedback_shift_engine &lhs, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const linear_feedback_shift_engine &lhs, const linear_feedback_shift_engine &rhs) { diff --git a/thrust/random/detail/mod.h b/thrust/random/detail/mod.h index f0637582d..4b9cf9da5 100644 --- a/thrust/random/detail/mod.h +++ b/thrust/random/detail/mod.h @@ -32,7 +32,7 @@ template static const T q = m / a; static const T r = m % a; - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T x) const { THRUST_IF_CONSTEXPR(a == 1) @@ -75,7 +75,7 @@ template template struct static_mod { - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T x) const { return a * x + c; @@ -83,7 +83,7 @@ template }; // end static_mod template -__host__ __device__ +THRUST_HOST_DEVICE T mod(T x) { static_mod f; diff --git a/thrust/random/detail/normal_distribution.inl b/thrust/random/detail/normal_distribution.inl index d08b96efd..6b0d852c2 100644 --- a/thrust/random/detail/normal_distribution.inl +++ b/thrust/random/detail/normal_distribution.inl @@ -37,7 +37,7 @@ namespace random template - __host__ __device__ + THRUST_HOST_DEVICE normal_distribution ::normal_distribution(RealType a, RealType b) :super_t(),m_param(a,b) @@ -46,7 +46,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE normal_distribution ::normal_distribution(const param_type &parm) :super_t(),m_param(parm) @@ -55,7 +55,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void normal_distribution ::reset(void) { @@ -65,7 +65,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename normal_distribution::result_type normal_distribution ::operator()(UniformRandomNumberGenerator &urng) @@ -76,7 +76,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename normal_distribution::result_type normal_distribution ::operator()(UniformRandomNumberGenerator &urng, @@ -87,7 +87,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename normal_distribution::param_type normal_distribution ::param(void) const @@ -97,7 +97,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void normal_distribution ::param(const param_type &parm) { @@ -106,7 +106,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename normal_distribution::result_type normal_distribution ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const @@ -116,7 +116,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename normal_distribution::result_type normal_distribution ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const @@ -137,7 +137,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename normal_distribution::result_type normal_distribution ::mean(void) const @@ -147,7 +147,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename normal_distribution::result_type normal_distribution ::stddev(void) const @@ -157,7 +157,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool normal_distribution ::equal(const normal_distribution &rhs) const { @@ -214,7 +214,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const normal_distribution &lhs, const normal_distribution &rhs) { @@ -223,7 +223,7 @@ bool operator==(const normal_distribution &lhs, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const normal_distribution &lhs, const normal_distribution &rhs) { diff --git a/thrust/random/detail/normal_distribution_base.h b/thrust/random/detail/normal_distribution_base.h index 01d833bb1..d6e0ead96 100644 --- a/thrust/random/detail/normal_distribution_base.h +++ b/thrust/random/detail/normal_distribution_base.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ template { protected: template - __host__ __device__ + THRUST_HOST_DEVICE RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev) { using uint_type = typename UniformRandomNumberGenerator::result_type; @@ -77,7 +77,7 @@ template } // no-op - __host__ __device__ + THRUST_HOST_DEVICE void reset() {} }; @@ -102,7 +102,7 @@ template // note that we promise to call this member function with the same mean and stddev template - __host__ __device__ + THRUST_HOST_DEVICE RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev) { // implementation from Boost diff --git a/thrust/random/detail/random_core_access.h b/thrust/random/detail/random_core_access.h index a3e34e02b..572c6603e 100644 --- a/thrust/random/detail/random_core_access.h +++ b/thrust/random/detail/random_core_access.h @@ -42,7 +42,7 @@ static IStream &stream_in(IStream &is, EngineOrDistribution &x) } template -__host__ __device__ +THRUST_HOST_DEVICE static bool equal(const EngineOrDistribution &lhs, const EngineOrDistribution &rhs) { return lhs.equal(rhs); diff --git a/thrust/random/detail/subtract_with_carry_engine.inl b/thrust/random/detail/subtract_with_carry_engine.inl index 21c22fe77..50b837721 100644 --- a/thrust/random/detail/subtract_with_carry_engine.inl +++ b/thrust/random/detail/subtract_with_carry_engine.inl @@ -30,7 +30,7 @@ namespace random template - __host__ __device__ + THRUST_HOST_DEVICE subtract_with_carry_engine ::subtract_with_carry_engine(result_type value) { @@ -39,7 +39,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void subtract_with_carry_engine ::seed(result_type value) { @@ -58,7 +58,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename subtract_with_carry_engine::result_type subtract_with_carry_engine ::operator()(void) @@ -90,7 +90,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void subtract_with_carry_engine ::discard(unsigned long long z) { @@ -150,7 +150,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool subtract_with_carry_engine ::equal(const subtract_with_carry_engine &rhs) const { @@ -190,7 +190,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE bool operator==(const subtract_with_carry_engine &lhs, const subtract_with_carry_engine &rhs) { @@ -199,7 +199,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool operator!=(const subtract_with_carry_engine &lhs, const subtract_with_carry_engine &rhs) { diff --git a/thrust/random/detail/uniform_int_distribution.inl b/thrust/random/detail/uniform_int_distribution.inl index 064bfcc73..199ce135b 100644 --- a/thrust/random/detail/uniform_int_distribution.inl +++ b/thrust/random/detail/uniform_int_distribution.inl @@ -29,7 +29,7 @@ namespace random template - __host__ __device__ + THRUST_HOST_DEVICE uniform_int_distribution ::uniform_int_distribution(IntType a, IntType b) :m_param(a,b) @@ -38,7 +38,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE uniform_int_distribution ::uniform_int_distribution(const param_type &parm) :m_param(parm) @@ -47,7 +47,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void uniform_int_distribution ::reset(void) { @@ -56,7 +56,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_int_distribution::result_type uniform_int_distribution ::operator()(UniformRandomNumberGenerator &urng) @@ -67,7 +67,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_int_distribution::result_type uniform_int_distribution ::operator()(UniformRandomNumberGenerator &urng, const param_type &parm) @@ -90,7 +90,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_int_distribution::result_type uniform_int_distribution ::a(void) const @@ -100,7 +100,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_int_distribution::result_type uniform_int_distribution ::b(void) const @@ -110,7 +110,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_int_distribution::param_type uniform_int_distribution ::param(void) const @@ -120,7 +120,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE void uniform_int_distribution ::param(const param_type &parm) { @@ -129,7 +129,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_int_distribution::result_type uniform_int_distribution ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const @@ -139,7 +139,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_int_distribution::result_type uniform_int_distribution ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const @@ -149,7 +149,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool uniform_int_distribution ::equal(const uniform_int_distribution &rhs) const { @@ -206,7 +206,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const uniform_int_distribution &lhs, const uniform_int_distribution &rhs) { @@ -215,7 +215,7 @@ bool operator==(const uniform_int_distribution &lhs, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const uniform_int_distribution &lhs, const uniform_int_distribution &rhs) { diff --git a/thrust/random/detail/uniform_real_distribution.inl b/thrust/random/detail/uniform_real_distribution.inl index 119f82c1e..77081fc31 100644 --- a/thrust/random/detail/uniform_real_distribution.inl +++ b/thrust/random/detail/uniform_real_distribution.inl @@ -27,7 +27,7 @@ namespace random template - __host__ __device__ + THRUST_HOST_DEVICE uniform_real_distribution ::uniform_real_distribution(RealType a, RealType b) :m_param(a,b) @@ -35,7 +35,7 @@ template } // end uniform_real_distribution::uniform_real_distribution() template - __host__ __device__ + THRUST_HOST_DEVICE uniform_real_distribution ::uniform_real_distribution(const param_type &parm) :m_param(parm) @@ -43,7 +43,7 @@ template } // end uniform_real_distribution::uniform_real_distribution() template - __host__ __device__ + THRUST_HOST_DEVICE void uniform_real_distribution ::reset(void) { @@ -51,7 +51,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_real_distribution::result_type uniform_real_distribution ::operator()(UniformRandomNumberGenerator &urng) @@ -61,7 +61,7 @@ template template template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_real_distribution::result_type uniform_real_distribution ::operator()(UniformRandomNumberGenerator &urng, @@ -80,7 +80,7 @@ template } // end uniform_real::operator()() template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_real_distribution::result_type uniform_real_distribution ::a(void) const @@ -89,7 +89,7 @@ template } // end uniform_real::a() template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_real_distribution::result_type uniform_real_distribution ::b(void) const @@ -98,7 +98,7 @@ template } // end uniform_real_distribution::b() template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_real_distribution::param_type uniform_real_distribution ::param(void) const @@ -107,7 +107,7 @@ template } // end uniform_real_distribution::param() template - __host__ __device__ + THRUST_HOST_DEVICE void uniform_real_distribution ::param(const param_type &parm) { @@ -115,7 +115,7 @@ template } // end uniform_real_distribution::param() template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_real_distribution::result_type uniform_real_distribution ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const @@ -124,7 +124,7 @@ template } // end uniform_real_distribution::min() template - __host__ __device__ + THRUST_HOST_DEVICE typename uniform_real_distribution::result_type uniform_real_distribution ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const @@ -134,7 +134,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool uniform_real_distribution ::equal(const uniform_real_distribution &rhs) const { @@ -191,7 +191,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const uniform_real_distribution &lhs, const uniform_real_distribution &rhs) { @@ -200,7 +200,7 @@ bool operator==(const uniform_real_distribution &lhs, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const uniform_real_distribution &lhs, const uniform_real_distribution &rhs) { diff --git a/thrust/random/detail/xor_combine_engine.inl b/thrust/random/detail/xor_combine_engine.inl index c94821443..146b4e4b3 100644 --- a/thrust/random/detail/xor_combine_engine.inl +++ b/thrust/random/detail/xor_combine_engine.inl @@ -28,7 +28,7 @@ namespace random template - __host__ __device__ + THRUST_HOST_DEVICE xor_combine_engine ::xor_combine_engine(void) :m_b1(),m_b2() @@ -37,7 +37,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE xor_combine_engine ::xor_combine_engine(const base1_type &urng1, const base2_type &urng2) :m_b1(urng1),m_b2(urng2) @@ -46,7 +46,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE xor_combine_engine ::xor_combine_engine(result_type s) :m_b1(s),m_b2(s) @@ -55,7 +55,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE void xor_combine_engine ::seed(void) { @@ -65,7 +65,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE void xor_combine_engine ::seed(result_type s) { @@ -75,7 +75,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE const typename xor_combine_engine::base1_type & xor_combine_engine ::base1(void) const @@ -85,7 +85,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE const typename xor_combine_engine::base2_type & xor_combine_engine ::base2(void) const @@ -95,7 +95,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE typename xor_combine_engine::result_type xor_combine_engine ::operator()(void) @@ -106,7 +106,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE void xor_combine_engine ::discard(unsigned long long z) { @@ -166,7 +166,7 @@ template template - __host__ __device__ + THRUST_HOST_DEVICE bool xor_combine_engine ::equal(const xor_combine_engine &rhs) const { @@ -195,7 +195,7 @@ operator>>(std::basic_istream &is, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const xor_combine_engine &lhs, const xor_combine_engine &rhs) { @@ -204,7 +204,7 @@ bool operator==(const xor_combine_engine &lhs, template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const xor_combine_engine &lhs, const xor_combine_engine &rhs) { diff --git a/thrust/random/discard_block_engine.h b/thrust/random/discard_block_engine.h index dc1ca91f5..493290861 100644 --- a/thrust/random/discard_block_engine.h +++ b/thrust/random/discard_block_engine.h @@ -110,7 +110,7 @@ template /*! This constructor constructs a new \p discard_block_engine and constructs * its \p base_type engine using its null constructor. */ - __host__ __device__ + THRUST_HOST_DEVICE discard_block_engine(); /*! This constructor constructs a new \p discard_block_engine using @@ -119,20 +119,20 @@ template * \param urng A \p base_type to use to initialize this \p discard_block_engine's * adapted base engine. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit discard_block_engine(const base_type &urng); /*! This constructor initializes a new \p discard_block_engine with a given seed. * * \param s The seed used to intialize this \p discard_block_engine's adapted base engine. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit discard_block_engine(result_type s); /*! This method initializes the state of this \p discard_block_engine's adapted base engine * by using its \p default_seed value. */ - __host__ __device__ + THRUST_HOST_DEVICE void seed(void); /*! This method initializes the state of this \p discard_block_engine's adapted base engine @@ -140,7 +140,7 @@ template * * \param s The seed with which to intialize this \p discard_block_engine's adapted base engine. */ - __host__ __device__ + THRUST_HOST_DEVICE void seed(result_type s); // generating functions @@ -148,7 +148,7 @@ template /*! This member function produces a new random value and updates this \p discard_block_engine's state. * \return A new random number. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(void); /*! This member function advances this \p discard_block_engine's state a given number of times @@ -157,7 +157,7 @@ template * \param z The number of random values to discard. * \note This function is provided because an implementation may be able to accelerate it. */ - __host__ __device__ + THRUST_HOST_DEVICE void discard(unsigned long long z); // property functions @@ -167,7 +167,7 @@ template * * \return A const reference to the base engine this \p discard_block_engine adapts. */ - __host__ __device__ + THRUST_HOST_DEVICE const base_type &base(void) const; /*! \cond @@ -178,7 +178,7 @@ template friend struct thrust::random::detail::random_core_access; - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const discard_block_engine &rhs) const; template @@ -197,7 +197,7 @@ template * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const discard_block_engine &lhs, const discard_block_engine &rhs); @@ -208,7 +208,7 @@ bool operator==(const discard_block_engine &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const discard_block_engine &lhs, const discard_block_engine &rhs); diff --git a/thrust/random/linear_congruential_engine.h b/thrust/random/linear_congruential_engine.h index dac03d90e..13da4e812 100644 --- a/thrust/random/linear_congruential_engine.h +++ b/thrust/random/linear_congruential_engine.h @@ -152,7 +152,7 @@ template * * \param s The seed used to intialize this \p linear_congruential_engine's state. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit linear_congruential_engine(result_type s = default_seed); /*! This method initializes this \p linear_congruential_engine's state, and optionally accepts @@ -160,7 +160,7 @@ template * * \param s The seed used to initializes this \p linear_congruential_engine's state. */ - __host__ __device__ + THRUST_HOST_DEVICE void seed(result_type s = default_seed); // generating functions @@ -168,7 +168,7 @@ template /*! This member function produces a new random value and updates this \p linear_congruential_engine's state. * \return A new random number. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(void); /*! This member function advances this \p linear_congruential_engine's state a given number of times @@ -177,7 +177,7 @@ template * \param z The number of random values to discard. * \note This function is provided because an implementation may be able to accelerate it. */ - __host__ __device__ + THRUST_HOST_DEVICE void discard(unsigned long long z); /*! \cond @@ -191,7 +191,7 @@ template friend struct thrust::random::detail::linear_congruential_engine_discard; - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const linear_congruential_engine &rhs) const; template @@ -211,7 +211,7 @@ template * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const linear_congruential_engine &lhs, const linear_congruential_engine &rhs); @@ -222,7 +222,7 @@ bool operator==(const linear_congruential_engine &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const linear_congruential_engine &lhs, const linear_congruential_engine &rhs); diff --git a/thrust/random/linear_feedback_shift_engine.h b/thrust/random/linear_feedback_shift_engine.h index a46c6d8ab..c6b9b4b3c 100644 --- a/thrust/random/linear_feedback_shift_engine.h +++ b/thrust/random/linear_feedback_shift_engine.h @@ -119,7 +119,7 @@ template * * \param value The seed used to intialize this \p linear_feedback_shift_engine's state. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit linear_feedback_shift_engine(result_type value = default_seed); /*! This method initializes this \p linear_feedback_shift_engine's state, and optionally accepts @@ -127,7 +127,7 @@ template * * \param value The seed used to initializes this \p linear_feedback_shift_engine's state. */ - __host__ __device__ + THRUST_HOST_DEVICE void seed(result_type value = default_seed); // generating functions @@ -135,7 +135,7 @@ template /*! This member function produces a new random value and updates this \p linear_feedback_shift_engine's state. * \return A new random number. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(void); /*! This member function advances this \p linear_feedback_shift_engine's state a given number of times @@ -144,7 +144,7 @@ template * \param z The number of random values to discard. * \note This function is provided because an implementation may be able to accelerate it. */ - __host__ __device__ + THRUST_HOST_DEVICE void discard(unsigned long long z); /*! \cond @@ -154,7 +154,7 @@ template friend struct thrust::random::detail::random_core_access; - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const linear_feedback_shift_engine &rhs) const; template @@ -174,7 +174,7 @@ template * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const linear_feedback_shift_engine &lhs, const linear_feedback_shift_engine &rhs); @@ -185,7 +185,7 @@ bool operator==(const linear_feedback_shift_engine &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const linear_feedback_shift_engine &lhs, const linear_feedback_shift_engine &rhs); diff --git a/thrust/random/normal_distribution.h b/thrust/random/normal_distribution.h index 36b985cb6..04bec24e2 100644 --- a/thrust/random/normal_distribution.h +++ b/thrust/random/normal_distribution.h @@ -105,7 +105,7 @@ template * \param mean The mean (expected value) of the distribution. Defaults to \c 0.0. * \param stddev The standard deviation of the distribution. Defaults to \c 1.0. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit normal_distribution(RealType mean = 0.0, RealType stddev = 1.0); /*! This constructor creates a new \p normal_distribution from a \p param_type object @@ -113,14 +113,14 @@ template * * \param parm A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of the distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit normal_distribution(const param_type &parm); /*! Calling this member function guarantees that subsequent uses of this * \p normal_distribution do not depend on values produced by any random * number generator prior to invoking this function. */ - __host__ __device__ + THRUST_HOST_DEVICE void reset(void); // generating functions @@ -131,7 +131,7 @@ template * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. */ template - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(UniformRandomNumberGenerator &urng); /*! This method produces a new Normal random integer as if by creating a new \p normal_distribution @@ -143,7 +143,7 @@ template * to draw from. */ template - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); // property functions @@ -153,7 +153,7 @@ template * * \return The mean (expected value) of this \p normal_distribution's output. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type mean(void) const; /*! This method returns the value of the parameter with which this \p normal_distribution @@ -161,7 +161,7 @@ template * * \return The standard deviation of this \p uniform_real_distribution's output. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type stddev(void) const; /*! This method returns a \p param_type object encapsulating the parameters with which this @@ -169,7 +169,7 @@ template * * \return A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of this \p normal_distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE param_type param(void) const; /*! This method changes the parameters of this \p normal_distribution using the values encapsulated @@ -177,21 +177,21 @@ template * * \param parm A \p param_type object encapsulating the new parameters (i.e., the mean and variance) of this \p normal_distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE void param(const param_type &parm); /*! This method returns the smallest floating point number this \p normal_distribution can potentially produce. * * \return The lower bound of this \p normal_distribution's half-open interval. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce. * * \return The upper bound of this \p normal_distribution's half-open interval. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; /*! \cond @@ -201,7 +201,7 @@ template friend struct thrust::random::detail::random_core_access; - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const normal_distribution &rhs) const; template @@ -220,7 +220,7 @@ template * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const normal_distribution &lhs, const normal_distribution &rhs); @@ -231,7 +231,7 @@ bool operator==(const normal_distribution &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const normal_distribution &lhs, const normal_distribution &rhs); diff --git a/thrust/random/subtract_with_carry_engine.h b/thrust/random/subtract_with_carry_engine.h index 69ee841fd..4afe74d5e 100644 --- a/thrust/random/subtract_with_carry_engine.h +++ b/thrust/random/subtract_with_carry_engine.h @@ -113,7 +113,7 @@ template * * \param value The seed used to intialize this \p subtract_with_carry_engine's state. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit subtract_with_carry_engine(result_type value = default_seed); /*! This method initializes this \p subtract_with_carry_engine's state, and optionally accepts @@ -121,7 +121,7 @@ template * * \param value The seed used to initializes this \p subtract_with_carry_engine's state. */ - __host__ __device__ + THRUST_HOST_DEVICE void seed(result_type value = default_seed); // generating functions @@ -129,7 +129,7 @@ template /*! This member function produces a new random value and updates this \p subtract_with_carry_engine's state. * \return A new random number. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(void); /*! This member function advances this \p subtract_with_carry_engine's state a given number of times @@ -138,7 +138,7 @@ template * \param z The number of random values to discard. * \note This function is provided because an implementation may be able to accelerate it. */ - __host__ __device__ + THRUST_HOST_DEVICE void discard(unsigned long long z); /*! \cond @@ -150,7 +150,7 @@ template friend struct thrust::random::detail::random_core_access; - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const subtract_with_carry_engine &rhs) const; template @@ -170,7 +170,7 @@ template * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const subtract_with_carry_engine &lhs, const subtract_with_carry_engine &rhs); @@ -181,7 +181,7 @@ bool operator==(const subtract_with_carry_engine &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const subtract_with_carry_engine&lhs, const subtract_with_carry_engine&rhs); diff --git a/thrust/random/uniform_int_distribution.h b/thrust/random/uniform_int_distribution.h index 18f369fc2..5c7ce1479 100644 --- a/thrust/random/uniform_int_distribution.h +++ b/thrust/random/uniform_int_distribution.h @@ -108,7 +108,7 @@ template * \param b The largest integer to potentially produce. Defaults to the largest representable integer in * the platform. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit uniform_int_distribution(IntType a = 0, IntType b = THRUST_NS_QUALIFIER::detail::integer_traits::const_max); @@ -117,12 +117,12 @@ template * * \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit uniform_int_distribution(const param_type &parm); /*! This does nothing. It is included to conform to the requirements of the RandomDistribution concept. */ - __host__ __device__ + THRUST_HOST_DEVICE void reset(void); // generating functions @@ -133,7 +133,7 @@ template * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. */ template - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(UniformRandomNumberGenerator &urng); /*! This method produces a new uniform random integer as if by creating a new \p uniform_int_distribution @@ -145,7 +145,7 @@ template * to draw from. */ template - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); // property functions @@ -155,7 +155,7 @@ template * * \return The lower bound of this \p uniform_int_distribution's range. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type a(void) const; /*! This method returns the value of the parameter with which this \p uniform_int_distribution @@ -163,7 +163,7 @@ template * * \return The upper bound of this \p uniform_int_distribution's range. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type b(void) const; /*! This method returns a \p param_type object encapsulating the parameters with which this @@ -171,7 +171,7 @@ template * * \return A \p param_type object enapsulating the range of this \p uniform_int_distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE param_type param(void) const; /*! This method changes the parameters of this \p uniform_int_distribution using the values encapsulated @@ -179,21 +179,21 @@ template * * \param parm A \p param_type object encapsulating the new range of this \p uniform_int_distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE void param(const param_type &parm); /*! This method returns the smallest integer this \p uniform_int_distribution can potentially produce. * * \return The lower bound of this \p uniform_int_distribution's range. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; /*! This method returns the largest integer this \p uniform_int_distribution can potentially produce. * * \return The upper bound of this \p uniform_int_distribution's range. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; /*! \cond @@ -203,7 +203,7 @@ template friend struct thrust::random::detail::random_core_access; - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const uniform_int_distribution &rhs) const; template @@ -222,7 +222,7 @@ template * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const uniform_int_distribution &lhs, const uniform_int_distribution &rhs); @@ -233,7 +233,7 @@ bool operator==(const uniform_int_distribution &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const uniform_int_distribution &lhs, const uniform_int_distribution &rhs); diff --git a/thrust/random/uniform_real_distribution.h b/thrust/random/uniform_real_distribution.h index e6c5a7d88..e4250f9fc 100644 --- a/thrust/random/uniform_real_distribution.h +++ b/thrust/random/uniform_real_distribution.h @@ -106,7 +106,7 @@ template * \param a The smallest floating point number to potentially produce. Defaults to \c 0.0. * \param b The smallest number larger than the largest floating point number to potentially produce. Defaults to \c 1.0. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit uniform_real_distribution(RealType a = 0.0, RealType b = 1.0); /*! This constructor creates a new \p uniform_real_distribution from a \p param_type object @@ -114,12 +114,12 @@ template * * \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE explicit uniform_real_distribution(const param_type &parm); /*! This does nothing. It is included to conform to the requirements of the RandomDistribution concept. */ - __host__ __device__ + THRUST_HOST_DEVICE void reset(void); // generating functions @@ -130,7 +130,7 @@ template * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. */ template - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(UniformRandomNumberGenerator &urng); /*! This method produces a new uniform random integer as if by creating a new \p uniform_real_distribution @@ -142,7 +142,7 @@ template * to draw from. */ template - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); // property functions @@ -152,7 +152,7 @@ template * * \return The lower bound of this \p uniform_real_distribution's half-open interval. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type a(void) const; /*! This method returns the value of the parameter with which this \p uniform_real_distribution @@ -160,7 +160,7 @@ template * * \return The upper bound of this \p uniform_real_distribution's half-open interval. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type b(void) const; /*! This method returns a \p param_type object encapsulating the parameters with which this @@ -168,7 +168,7 @@ template * * \return A \p param_type object enapsulating the half-open interval of this \p uniform_real_distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE param_type param(void) const; /*! This method changes the parameters of this \p uniform_real_distribution using the values encapsulated @@ -176,21 +176,21 @@ template * * \param parm A \p param_type object encapsulating the new half-open interval of this \p uniform_real_distribution. */ - __host__ __device__ + THRUST_HOST_DEVICE void param(const param_type &parm); /*! This method returns the smallest floating point number this \p uniform_real_distribution can potentially produce. * * \return The lower bound of this \p uniform_real_distribution's half-open interval. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce. * * \return The upper bound of this \p uniform_real_distribution's half-open interval. */ - __host__ __device__ + THRUST_HOST_DEVICE result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; /*! \cond @@ -200,7 +200,7 @@ template friend struct thrust::random::detail::random_core_access; - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const uniform_real_distribution &rhs) const; template @@ -219,7 +219,7 @@ template * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const uniform_real_distribution &lhs, const uniform_real_distribution &rhs); @@ -230,7 +230,7 @@ bool operator==(const uniform_real_distribution &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const uniform_real_distribution &lhs, const uniform_real_distribution &rhs); diff --git a/thrust/random/xor_combine_engine.h b/thrust/random/xor_combine_engine.h index 321f04033..9ec42f517 100644 --- a/thrust/random/xor_combine_engine.h +++ b/thrust/random/xor_combine_engine.h @@ -117,7 +117,7 @@ template @@ -215,7 +215,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator==(const xor_combine_engine &lhs, const xor_combine_engine &rhs); @@ -226,7 +226,7 @@ bool operator==(const xor_combine_engine &lhs, * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. */ template -__host__ __device__ +THRUST_HOST_DEVICE bool operator!=(const xor_combine_engine &lhs, const xor_combine_engine &rhs); diff --git a/thrust/reduce.h b/thrust/reduce.h index cd3d00a72..cb1941796 100644 --- a/thrust/reduce.h +++ b/thrust/reduce.h @@ -78,7 +78,7 @@ THRUST_NAMESPACE_BEGIN * \see https://en.cppreference.com/w/cpp/algorithm/accumulate */ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::value_type reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last); @@ -172,7 +172,7 @@ template typename * \see https://en.cppreference.com/w/cpp/algorithm/accumulate */ template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -280,7 +280,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -404,7 +404,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -543,7 +543,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -696,7 +696,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, diff --git a/thrust/remove.h b/thrust/remove.h index a57fcf211..e74a93d0d 100644 --- a/thrust/remove.h +++ b/thrust/remove.h @@ -93,7 +93,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -210,7 +210,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -303,7 +303,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -376,7 +376,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int x) * { * return (x % 2) == 0; @@ -454,7 +454,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -517,7 +517,7 @@ __host__ __device__ * ... * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(const int x) * { * return (x % 2) == 0; @@ -604,7 +604,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -727,7 +727,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/replace.h b/thrust/replace.h index 68d559835..767024246 100644 --- a/thrust/replace.h +++ b/thrust/replace.h @@ -83,7 +83,7 @@ THRUST_NAMESPACE_BEGIN * \see \c replace_copy_if */ template -__host__ __device__ +THRUST_HOST_DEVICE void replace(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, const T &old_value, @@ -169,7 +169,7 @@ template * ... * struct is_less_than_zero * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x < 0; @@ -197,7 +197,7 @@ template * \see \c replace_copy_if */ template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, Predicate pred, @@ -230,7 +230,7 @@ __host__ __device__ * ... * struct is_less_than_zero * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x < 0; @@ -298,7 +298,7 @@ template * * struct is_less_than_zero * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x < 0; @@ -331,7 +331,7 @@ template * \see \c replace_copy_if */ template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, InputIterator stencil, @@ -368,7 +368,7 @@ __host__ __device__ * * struct is_less_than_zero * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x < 0; @@ -460,7 +460,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, @@ -556,7 +556,7 @@ template * * struct is_less_than_zero * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x < 0; @@ -585,7 +585,7 @@ template * \see \c replace_copy */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, @@ -623,7 +623,7 @@ __host__ __device__ * * struct is_less_than_zero * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x < 0; @@ -696,7 +696,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, InputIterator2 stencil, @@ -771,7 +771,7 @@ __host__ __device__ * * struct is_less_than_zero * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x < 0; diff --git a/thrust/reverse.h b/thrust/reverse.h index 6bda425cc..3754f481f 100644 --- a/thrust/reverse.h +++ b/thrust/reverse.h @@ -66,7 +66,7 @@ THRUST_NAMESPACE_BEGIN * \see \p reverse_iterator */ template -__host__ __device__ +THRUST_HOST_DEVICE void reverse(const thrust::detail::execution_policy_base &exec, BidirectionalIterator first, BidirectionalIterator last); @@ -151,7 +151,7 @@ template * \see \p reverse_iterator */ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator reverse_copy(const thrust::detail::execution_policy_base &exec, BidirectionalIterator first, BidirectionalIterator last, diff --git a/thrust/scan.h b/thrust/scan.h index a13d7e002..9499ce641 100644 --- a/thrust/scan.h +++ b/thrust/scan.h @@ -95,7 +95,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -214,7 +214,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -331,7 +331,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -441,7 +441,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -564,7 +564,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -706,7 +706,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -848,7 +848,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1004,7 +1004,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1146,7 +1146,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1263,7 +1263,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1391,7 +1391,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1549,7 +1549,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/scatter.h b/thrust/scatter.h index b8b0bd84f..f9dcee2eb 100644 --- a/thrust/scatter.h +++ b/thrust/scatter.h @@ -88,7 +88,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, @@ -203,7 +203,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, @@ -304,7 +304,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, @@ -376,7 +376,7 @@ __host__ __device__ * * struct is_even * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return (x % 2) == 0; diff --git a/thrust/sequence.h b/thrust/sequence.h index fb3959e3c..f0f4ec302 100644 --- a/thrust/sequence.h +++ b/thrust/sequence.h @@ -67,7 +67,7 @@ THRUST_NAMESPACE_BEGIN * \see https://en.cppreference.com/w/cpp/algorithm/iota */ template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); @@ -148,7 +148,7 @@ template * \see https://en.cppreference.com/w/cpp/algorithm/iota */ template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -235,7 +235,7 @@ template * \see https://en.cppreference.com/w/cpp/algorithm/iota */ template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/set_operations.h b/thrust/set_operations.h index 65a48d1b6..3828691cc 100644 --- a/thrust/set_operations.h +++ b/thrust/set_operations.h @@ -103,7 +103,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -252,7 +252,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -410,7 +410,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -574,7 +574,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -737,7 +737,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -896,7 +896,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1055,7 +1055,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1208,7 +1208,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, @@ -1383,7 +1383,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -1588,7 +1588,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -1795,7 +1795,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -1998,7 +1998,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -2206,7 +2206,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -2417,7 +2417,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -2628,7 +2628,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, @@ -2835,7 +2835,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first1, diff --git a/thrust/shuffle.h b/thrust/shuffle.h index 52fc2d5e6..ed9b8d1c9 100644 --- a/thrust/shuffle.h +++ b/thrust/shuffle.h @@ -70,7 +70,7 @@ THRUST_NAMESPACE_BEGIN * \see \p shuffle_copy */ template -__host__ __device__ void shuffle( +THRUST_HOST_DEVICE void shuffle( const thrust::detail::execution_policy_base& exec, RandomIterator first, RandomIterator last, URBG&& g); @@ -99,7 +99,7 @@ __host__ __device__ void shuffle( * \see \p shuffle_copy */ template -__host__ __device__ void shuffle(RandomIterator first, RandomIterator last, +THRUST_HOST_DEVICE void shuffle(RandomIterator first, RandomIterator last, URBG&& g); /*! shuffle_copy differs from shuffle only in that the reordered sequence is written to different output sequences, rather than in place. @@ -137,7 +137,7 @@ __host__ __device__ void shuffle(RandomIterator first, RandomIterator last, */ template -__host__ __device__ void shuffle_copy( +THRUST_HOST_DEVICE void shuffle_copy( const thrust::detail::execution_policy_base& exec, RandomIterator first, RandomIterator last, OutputIterator result, URBG&& g); @@ -170,7 +170,7 @@ __host__ __device__ void shuffle_copy( * \see \p shuffle */ template -__host__ __device__ void shuffle_copy(RandomIterator first, RandomIterator last, +THRUST_HOST_DEVICE void shuffle_copy(RandomIterator first, RandomIterator last, OutputIterator result, URBG&& g); THRUST_NAMESPACE_END diff --git a/thrust/sort.h b/thrust/sort.h index 5cf9d6217..a298bee64 100644 --- a/thrust/sort.h +++ b/thrust/sort.h @@ -73,7 +73,7 @@ THRUST_NAMESPACE_BEGIN * \see \p sort_by_key */ template -__host__ __device__ +THRUST_HOST_DEVICE void sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last); @@ -165,7 +165,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE void sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -212,7 +212,7 @@ __host__ __device__ */ template -__host__ __device__ +THRUST_HOST_DEVICE void sort(RandomAccessIterator first, RandomAccessIterator last, StrictWeakOrdering comp); @@ -263,7 +263,7 @@ __host__ __device__ * \see \p stable_sort_by_key */ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last); @@ -363,7 +363,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(const thrust::detail::execution_policy_base &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -481,7 +481,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -603,7 +603,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -730,7 +730,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -857,7 +857,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -995,7 +995,7 @@ template */ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); @@ -1110,7 +1110,7 @@ template * \see \c less */ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -1211,7 +1211,7 @@ template * \see \p stable_sort_by_key */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); @@ -1299,7 +1299,7 @@ template * \see \p stable_sort_by_key */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/swap.h b/thrust/swap.h index d8a8be73c..54236130b 100644 --- a/thrust/swap.h +++ b/thrust/swap.h @@ -58,7 +58,7 @@ THRUST_NAMESPACE_BEGIN * \endcode */ template -__host__ __device__ +THRUST_HOST_DEVICE inline void swap(Assignable1 &a, Assignable2 &b); /*! \} // swap @@ -122,7 +122,7 @@ inline void swap(Assignable1 &a, Assignable2 &b); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base &exec, ForwardIterator1 first1, ForwardIterator1 last1, diff --git a/thrust/system/cpp/detail/par.h b/thrust/system/cpp/detail/par.h index 2bfc40d73..ccac29048 100644 --- a/thrust/system/cpp/detail/par.h +++ b/thrust/system/cpp/detail/par.h @@ -33,7 +33,7 @@ struct par_t : thrust::system::cpp::detail::execution_policy, thrust::detail::allocator_aware_execution_policy< thrust::system::cpp::detail::execution_policy> { - __host__ __device__ + THRUST_HOST_DEVICE constexpr par_t() : thrust::system::cpp::detail::execution_policy() {} }; diff --git a/thrust/system/cpp/execution_policy.h b/thrust/system/cpp/execution_policy.h index 0d8a9a367..46eeb7d03 100644 --- a/thrust/system/cpp/execution_policy.h +++ b/thrust/system/cpp/execution_policy.h @@ -126,7 +126,7 @@ struct tag : thrust::system::cpp::execution_policy { unspecified }; * * struct printf_functor * { - * __host__ __device__ + * THRUST_HOST_DEVICE * void operator()(int x) * { * printf("%d\n", x); diff --git a/thrust/system/cuda/config.h b/thrust/system/cuda/config.h index 2f9db3e0c..3b5c4960d 100644 --- a/thrust/system/cuda/config.h +++ b/thrust/system/cuda/config.h @@ -95,18 +95,19 @@ #ifdef THRUST_AGENT_ENTRY_NOINLINE #define THRUST_AGENT_ENTRY_INLINE_ATTR __noinline__ #else -#define THRUST_AGENT_ENTRY_INLINE_ATTR __forceinline__ +#define THRUST_AGENT_ENTRY_INLINE_ATTR _CCCL_FORCEINLINE #endif -#define THRUST_DEVICE_FUNCTION __device__ __forceinline__ -#define THRUST_HOST_FUNCTION __host__ __forceinline__ -#define THRUST_FUNCTION __host__ __device__ __forceinline__ +#define THRUST_DEVICE_FUNCTION _CCCL_DEVICE _CCCL_FORCEINLINE +#define THRUST_HOST_FUNCTION _CCCL_HOST _CCCL_FORCEINLINE +#define THRUST_FUNCTION _CCCL_HOST_DEVICE _CCCL_FORCEINLINE + #if 0 #define THRUST_ARGS(...) __VA_ARGS__ #define THRUST_STRIP_PARENS(X) X #define THRUST_AGENT_ENTRY(ARGS) THRUST_FUNCTION static void entry(THRUST_STRIP_PARENS(THRUST_ARGS ARGS)) #else -#define THRUST_AGENT_ENTRY(...) THRUST_AGENT_ENTRY_INLINE_ATTR __device__ static void entry(__VA_ARGS__) +#define THRUST_AGENT_ENTRY(...) THRUST_AGENT_ENTRY_INLINE_ATTR _CCCL_DEVICE static void entry(__VA_ARGS__) #endif #ifndef THRUST_IGNORE_CUB_VERSION_CHECK diff --git a/thrust/system/cuda/detail/adjacent_difference.h b/thrust/system/cuda/detail/adjacent_difference.h index 284611235..8e3403aab 100644 --- a/thrust/system/cuda/detail/adjacent_difference.h +++ b/thrust/system/cuda/detail/adjacent_difference.h @@ -50,7 +50,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ OutputIterator +_CCCL_HOST_DEVICE OutputIterator adjacent_difference( const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -240,12 +240,12 @@ namespace __adjacent_difference { // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE adjacent_difference(execution_policy &policy, InputIt first, InputIt last, @@ -269,7 +269,7 @@ adjacent_difference(execution_policy &policy, template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE adjacent_difference(execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/assign_value.h b/thrust/system/cuda/detail/assign_value.h index 8945f1cac..e1300c92d 100644 --- a/thrust/system/cuda/detail/assign_value.h +++ b/thrust/system/cuda/detail/assign_value.h @@ -31,18 +31,18 @@ namespace cuda_cub { template -inline __host__ __device__ +inline _CCCL_HOST_DEVICE void assign_value(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) { // XXX war nvbugs/881631 struct war_nvbugs_881631 { - __host__ inline static void host_path(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) + _CCCL_HOST inline static void host_path(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) { cuda_cub::copy(exec, src, src + 1, dst); } - __device__ inline static void device_path(thrust::cuda::execution_policy &, Pointer1 dst, Pointer2 src) + _CCCL_DEVICE inline static void device_path(thrust::cuda::execution_policy &, Pointer1 dst, Pointer2 src) { *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); } @@ -58,13 +58,13 @@ inline __host__ __device__ template -inline __host__ __device__ +inline _CCCL_HOST_DEVICE void assign_value(cross_system &systems, Pointer1 dst, Pointer2 src) { // XXX war nvbugs/881631 struct war_nvbugs_881631 { - __host__ inline static void host_path(cross_system &systems, Pointer1 dst, Pointer2 src) + _CCCL_HOST inline static void host_path(cross_system &systems, Pointer1 dst, Pointer2 src) { // rotate the systems so that they are ordered the same as (src, dst) // for the call to thrust::copy @@ -72,7 +72,7 @@ inline __host__ __device__ cuda_cub::copy(rotated_systems, src, src + 1, dst); } - __device__ inline static void device_path(cross_system &, Pointer1 dst, Pointer2 src) + _CCCL_DEVICE inline static void device_path(cross_system &, Pointer1 dst, Pointer2 src) { // XXX forward the true cuda::execution_policy inside systems here // instead of materializing a tag diff --git a/thrust/system/cuda/detail/async/for_each.h b/thrust/system/cuda/detail/async/for_each.h index d6809fe0a..f1fb0917a 100644 --- a/thrust/system/cuda/detail/async/for_each.h +++ b/thrust/system/cuda/detail/async/for_each.h @@ -58,13 +58,13 @@ struct async_for_each_fn ForwardIt first; UnaryFunction f; - __host__ __device__ + _CCCL_HOST_DEVICE async_for_each_fn(ForwardIt&& first_, UnaryFunction&& f_) : first(std::move(first_)), f(std::move(f_)) {} template - __host__ __device__ + _CCCL_HOST_DEVICE void operator()(Index idx) { f(thrust::raw_reference_cast(first[idx])); diff --git a/thrust/system/cuda/detail/async/transform.h b/thrust/system/cuda/detail/async/transform.h index a971300f2..329fb8bb9 100644 --- a/thrust/system/cuda/detail/async/transform.h +++ b/thrust/system/cuda/detail/async/transform.h @@ -59,13 +59,13 @@ struct async_transform_fn OutputIt output_; UnaryOperation op_; - __host__ __device__ + _CCCL_HOST_DEVICE async_transform_fn(ForwardIt&& first, OutputIt&& output, UnaryOperation&& op) : first_(std::move(first)), output_(std::move(output)), op_(std::move(op)) {} template - __host__ __device__ + _CCCL_HOST_DEVICE void operator()(Index idx) { output_[idx] = op_(thrust::raw_reference_cast(first_[idx])); diff --git a/thrust/system/cuda/detail/copy.h b/thrust/system/cuda/detail/copy.h index 02a5d2ac1..88ea9c450 100644 --- a/thrust/system/cuda/detail/copy.h +++ b/thrust/system/cuda/detail/copy.h @@ -38,14 +38,14 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ OutputIt +_CCCL_HOST_DEVICE OutputIt copy(const thrust::detail::execution_policy_base &exec, InputIt first, InputIt last, OutputIt result); template -__host__ __device__ OutputIt +_CCCL_HOST_DEVICE OutputIt copy_n(const thrust::detail::execution_policy_base &exec, InputIt first, Size n, @@ -57,7 +57,7 @@ namespace cuda_cub { template -OutputIterator __host__ __device__ +OutputIterator _CCCL_HOST_DEVICE copy(execution_policy &system, InputIterator first, InputIterator last, @@ -67,7 +67,7 @@ template -OutputIterator __host__ +OutputIterator _CCCL_HOST copy(cross_system systems, InputIterator first, InputIterator last, @@ -77,7 +77,7 @@ template -OutputIterator __host__ __device__ +OutputIterator _CCCL_HOST_DEVICE copy_n(execution_policy &system, InputIterator first, Size n, @@ -88,7 +88,7 @@ template -OutputIterator __host__ +OutputIterator _CCCL_HOST copy_n(cross_system systems, InputIterator first, Size n, @@ -110,11 +110,11 @@ namespace cuda_cub { #if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC // D->D copy requires NVCC compiler -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIterator __host__ __device__ +OutputIterator _CCCL_HOST_DEVICE copy(execution_policy &system, InputIterator first, InputIterator last, @@ -127,12 +127,12 @@ copy(execution_policy &system, return result; } // end copy() -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIterator __host__ __device__ +OutputIterator _CCCL_HOST_DEVICE copy_n(execution_policy &system, InputIterator first, Size n, @@ -153,7 +153,7 @@ template -OutputIterator __host__ +OutputIterator _CCCL_HOST copy(cross_system systems, InputIterator first, InputIterator last, @@ -167,7 +167,7 @@ template -OutputIterator __host__ +OutputIterator _CCCL_HOST copy_n(cross_system systems, InputIterator first, Size n, diff --git a/thrust/system/cuda/detail/copy_if.h b/thrust/system/cuda/detail/copy_if.h index 5e760c086..997793640 100644 --- a/thrust/system/cuda/detail/copy_if.h +++ b/thrust/system/cuda/detail/copy_if.h @@ -49,7 +49,7 @@ THRUST_NAMESPACE_BEGIN // XXX declare generic copy_if interface // to avoid circulular dependency from thrust/copy.h template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -58,7 +58,7 @@ __host__ __device__ Predicate pred); template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, @@ -774,12 +774,12 @@ namespace __copy_if { // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIterator __host__ __device__ +OutputIterator _CCCL_HOST_DEVICE copy_if(execution_policy &policy, InputIterator first, InputIterator last, @@ -800,13 +800,13 @@ copy_if(execution_policy &policy, pred);)); } // func copy_if -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIterator __host__ __device__ +OutputIterator _CCCL_HOST_DEVICE copy_if(execution_policy &policy, InputIterator first, InputIterator last, diff --git a/thrust/system/cuda/detail/core/agent_launcher.h b/thrust/system/cuda/detail/core/agent_launcher.h index 25def1ab6..e4cecb266 100644 --- a/thrust/system/cuda/detail/core/agent_launcher.h +++ b/thrust/system/cuda/detail/core/agent_launcher.h @@ -33,7 +33,6 @@ #if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC #include #include -#include #include diff --git a/thrust/system/cuda/detail/core/triple_chevron_launch.h b/thrust/system/cuda/detail/core/triple_chevron_launch.h index 668c8ac9d..506eeb5ea 100644 --- a/thrust/system/cuda/detail/core/triple_chevron_launch.h +++ b/thrust/system/cuda/detail/core/triple_chevron_launch.h @@ -28,7 +28,6 @@ #include #include -#include #include @@ -56,7 +55,7 @@ namespace launcher { stream(stream_) {} template - cudaError_t __host__ + cudaError_t _CCCL_HOST doit_host(K k, Args const&... args) const { k<<>>(args...); @@ -64,16 +63,16 @@ namespace launcher { } template - size_t __device__ + size_t _CCCL_DEVICE align_up(size_t offset) const { size_t alignment = alignment_of::value; return alignment * ((offset + (alignment - 1))/ alignment); } - size_t __device__ argument_pack_size(size_t size) const { return size; } + size_t _CCCL_DEVICE argument_pack_size(size_t size) const { return size; } template - size_t __device__ + size_t _CCCL_DEVICE argument_pack_size(size_t size, Arg const& arg, Args const&... args) const { size = align_up(size); @@ -81,7 +80,7 @@ namespace launcher { } template - size_t __device__ copy_arg(char* buffer, size_t offset, Arg arg) const + size_t _CCCL_DEVICE copy_arg(char* buffer, size_t offset, Arg arg) const { offset = align_up(offset); for (int i = 0; i != sizeof(Arg); ++i) @@ -89,12 +88,12 @@ namespace launcher { return offset + sizeof(Arg); } - __device__ + _CCCL_DEVICE void fill_arguments(char*, size_t) const {} template - __device__ + _CCCL_DEVICE void fill_arguments(char* buffer, size_t offset, Arg const& arg, @@ -105,7 +104,7 @@ namespace launcher { #ifdef THRUST_RDC_ENABLED template - cudaError_t __device__ + cudaError_t _CCCL_DEVICE doit_device(K k, Args const&... args) const { const size_t size = argument_pack_size(0,args...); @@ -115,7 +114,7 @@ namespace launcher { } template - cudaError_t __device__ + cudaError_t _CCCL_DEVICE launch_device(K k, void* buffer) const { return cudaLaunchDevice((void*)k, @@ -127,14 +126,14 @@ namespace launcher { } #else template - cudaError_t __device__ + cudaError_t _CCCL_DEVICE doit_device(K, Args const&... ) const { return cudaErrorNotSupported; } #endif - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE template THRUST_FUNCTION cudaError_t doit(K k, Args const&... args) const diff --git a/thrust/system/cuda/detail/core/util.h b/thrust/system/cuda/detail/core/util.h index a0cc01d6f..c034dae4b 100644 --- a/thrust/system/cuda/detail/core/util.h +++ b/thrust/system/cuda/detail/core/util.h @@ -386,10 +386,10 @@ namespace core { template void __global__ get_agent_plan_kernel(AgentPlan *plan); - static __device__ AgentPlan agent_plan_device; + static _CCCL_DEVICE AgentPlan agent_plan_device; template - AgentPlan __device__ get_agent_plan_dev() + AgentPlan _CCCL_DEVICE get_agent_plan_dev() { AgentPlan plan; plan.block_threads = Agent::ptx_plan::BLOCK_THREADS; @@ -400,7 +400,7 @@ namespace core { } template - AgentPlan __host__ __device__ __forceinline__ + AgentPlan _CCCL_HOST_DEVICE _CCCL_FORCEINLINE xget_agent_plan_impl(F f, cudaStream_t s, void* d_ptr) { AgentPlan plan; @@ -527,21 +527,21 @@ namespace core { }; // struct Iterator template - typename LoadIterator::type __device__ __forceinline__ + typename LoadIterator::type _CCCL_DEVICE _CCCL_FORCEINLINE make_load_iterator_impl(It it, thrust::detail::true_type /* is_trivial */) { return raw_pointer_cast(&*it); } template - typename LoadIterator::type __device__ __forceinline__ + typename LoadIterator::type _CCCL_DEVICE _CCCL_FORCEINLINE make_load_iterator_impl(It it, thrust::detail::false_type /* is_trivial */) { return it; } template - typename LoadIterator::type __device__ __forceinline__ + typename LoadIterator::type _CCCL_DEVICE _CCCL_FORCEINLINE make_load_iterator(PtxPlan const&, It it) { return make_load_iterator_impl( @@ -601,19 +601,19 @@ namespace core { public: cuda_optional() = default; - __host__ __device__ + _CCCL_HOST_DEVICE cuda_optional(T v, cudaError_t status = cudaSuccess) : status_(status), value_(v) {} - bool __host__ __device__ + bool _CCCL_HOST_DEVICE isValid() const { return cudaSuccess == status_; } - cudaError_t __host__ __device__ + cudaError_t _CCCL_HOST_DEVICE status() const { return status_; } - __host__ __device__ T const & + _CCCL_HOST_DEVICE T const & value() const { return value_; } - __host__ __device__ operator T const &() const { return value_; } + _CCCL_HOST_DEVICE operator T const &() const { return value_; } }; THRUST_RUNTIME_FUNCTION @@ -676,7 +676,7 @@ namespace core { return cub::SyncStream(stream); } - inline void __device__ sync_threadblock() + inline void _CCCL_DEVICE sync_threadblock() { cub::CTA_SYNC(); } @@ -703,12 +703,12 @@ namespace core { DeviceWord storage[WORDS]; - __host__ __device__ __forceinline__ T& get() + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE T& get() { return reinterpret_cast(*this); } - __host__ __device__ __forceinline__ operator T&() { return get(); } + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE operator T&() { return get(); } }; // uninitialized_array @@ -724,12 +724,12 @@ namespace core { T data_[N]; public: - __host__ __device__ T* data() { return data_; } - __host__ __device__ const T* data() const { return data_; } - __host__ __device__ T& operator[](unsigned int idx) { return ((T*)data_)[idx]; } - __host__ __device__ T const& operator[](unsigned int idx) const { return ((T*)data_)[idx]; } - __host__ __device__ unsigned int size() const { return N; } - __host__ __device__ operator ref&() { return data_; } + _CCCL_HOST_DEVICE T* data() { return data_; } + _CCCL_HOST_DEVICE const T* data() const { return data_; } + _CCCL_HOST_DEVICE T& operator[](unsigned int idx) { return ((T*)data_)[idx]; } + _CCCL_HOST_DEVICE T const& operator[](unsigned int idx) const { return ((T*)data_)[idx]; } + _CCCL_HOST_DEVICE unsigned int size() const { return N; } + _CCCL_HOST_DEVICE operator ref&() { return data_; } }; @@ -746,18 +746,18 @@ namespace core { char data_[N * sizeof(T)]; public: - __host__ __device__ T* data() { return data_; } - __host__ __device__ const T* data() const { return data_; } - __host__ __device__ T& operator[](unsigned int idx) { return ((T*)data_)[idx]; } - __host__ __device__ T const& operator[](unsigned int idx) const { return ((T*)data_)[idx]; } - __host__ __device__ T& operator[](int idx) { return ((T*)data_)[idx]; } - __host__ __device__ T const& operator[](int idx) const { return ((T*)data_)[idx]; } - __host__ __device__ unsigned int size() const { return N; } - __host__ __device__ operator ref&() { return *reinterpret_cast(data_); } - __host__ __device__ ref& get_ref() { return (ref&)*this; } + _CCCL_HOST_DEVICE T* data() { return data_; } + _CCCL_HOST_DEVICE const T* data() const { return data_; } + _CCCL_HOST_DEVICE T& operator[](unsigned int idx) { return ((T*)data_)[idx]; } + _CCCL_HOST_DEVICE T const& operator[](unsigned int idx) const { return ((T*)data_)[idx]; } + _CCCL_HOST_DEVICE T& operator[](int idx) { return ((T*)data_)[idx]; } + _CCCL_HOST_DEVICE T const& operator[](int idx) const { return ((T*)data_)[idx]; } + _CCCL_HOST_DEVICE unsigned int size() const { return N; } + _CCCL_HOST_DEVICE operator ref&() { return *reinterpret_cast(data_); } + _CCCL_HOST_DEVICE ref& get_ref() { return (ref&)*this; } }; - __host__ __device__ __forceinline__ size_t align_to(size_t n, size_t align) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE size_t align_to(size_t n, size_t align) { return ((n+align-1)/align) * align; } diff --git a/thrust/system/cuda/detail/count.h b/thrust/system/cuda/detail/count.h index b624f39dc..9b0d3ca21 100644 --- a/thrust/system/cuda/detail/count.h +++ b/thrust/system/cuda/detail/count.h @@ -41,7 +41,7 @@ namespace cuda_cub { template -typename iterator_traits::difference_type __host__ __device__ +typename iterator_traits::difference_type _CCCL_HOST_DEVICE count_if(execution_policy &policy, InputIt first, InputIt last, @@ -63,7 +63,7 @@ count_if(execution_policy &policy, template -typename iterator_traits::difference_type __host__ __device__ +typename iterator_traits::difference_type _CCCL_HOST_DEVICE count(execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/cross_system.h b/thrust/system/cuda/detail/cross_system.h index 039531d28..956bfc24b 100644 --- a/thrust/system/cuda/detail/cross_system.h +++ b/thrust/system/cuda/detail/cross_system.h @@ -28,7 +28,14 @@ #include -#include +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + #include #include @@ -44,10 +51,10 @@ namespace cuda_cub { policy1 &sys1; policy2 &sys2; - inline __host__ __device__ + inline _CCCL_HOST_DEVICE cross_system(policy1 &sys1, policy2 &sys2) : sys1(sys1), sys2(sys2) {} - inline __host__ __device__ + inline _CCCL_HOST_DEVICE cross_system rotate() const { return cross_system(sys2, sys1); @@ -57,7 +64,7 @@ namespace cuda_cub { #if THRUST_CPP_DIALECT >= 2011 // Device to host. template - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE auto direction_of_copy( thrust::system::cuda::execution_policy const& , thrust::cpp::execution_policy const& @@ -70,7 +77,7 @@ namespace cuda_cub { // Host to device. template - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE auto direction_of_copy( thrust::cpp::execution_policy const& , thrust::system::cuda::execution_policy const& @@ -83,7 +90,7 @@ namespace cuda_cub { // Device to device. template - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE auto direction_of_copy( thrust::system::cuda::execution_policy const& , thrust::system::cuda::execution_policy const& @@ -96,7 +103,7 @@ namespace cuda_cub { // Device to device. template - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE auto direction_of_copy(execution_policy const &) THRUST_DECLTYPE_RETURNS( thrust::detail::integral_constant< @@ -105,7 +112,7 @@ namespace cuda_cub { ) template - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE auto direction_of_copy( execution_policy> const &systems ) @@ -122,7 +129,7 @@ namespace cuda_cub { typename Direction = decltype(direction_of_copy(std::declval(), std::declval()))> - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE thrust::detail::integral_constant< bool, cudaMemcpyDeviceToHost == Direction::value > @@ -138,7 +145,7 @@ namespace cuda_cub { // MSVC2015 WAR: put decltype here instead of in trailing return type typename Direction = decltype(direction_of_copy(std::declval()))> - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE thrust::detail::integral_constant< bool, cudaMemcpyDeviceToHost == Direction::value > @@ -153,7 +160,7 @@ namespace cuda_cub { typename Direction = decltype(direction_of_copy(std::declval(), std::declval()))> - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE thrust::detail::integral_constant< bool, cudaMemcpyHostToDevice == Direction::value > @@ -169,7 +176,7 @@ namespace cuda_cub { // MSVC2015 WAR: put decltype here instead of in trailing return type typename Direction = decltype(direction_of_copy(std::declval()))> - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE thrust::detail::integral_constant< bool, cudaMemcpyHostToDevice == Direction::value > @@ -184,7 +191,7 @@ namespace cuda_cub { typename Direction = decltype(direction_of_copy(std::declval(), std::declval()))> - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE thrust::detail::integral_constant< bool, cudaMemcpyDeviceToDevice == Direction::value > @@ -200,7 +207,7 @@ namespace cuda_cub { // MSVC2015 WAR: put decltype here instead of in trailing return type typename Direction = decltype(direction_of_copy(std::declval()))> - constexpr __host__ __device__ + constexpr _CCCL_HOST_DEVICE thrust::detail::integral_constant< bool, cudaMemcpyDeviceToDevice == Direction::value > @@ -213,7 +220,7 @@ namespace cuda_cub { // Device to host. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_device_system(thrust::cuda::execution_policy &sys1, thrust::execution_policy &) @@ -221,7 +228,7 @@ namespace cuda_cub { // Device to host. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_device_system(thrust::cuda::execution_policy const &sys1, thrust::execution_policy const &) @@ -229,7 +236,7 @@ namespace cuda_cub { // Host to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_device_system(thrust::execution_policy &, thrust::cuda::execution_policy &sys2) @@ -237,7 +244,7 @@ namespace cuda_cub { // Host to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_device_system(thrust::execution_policy const &, thrust::cuda::execution_policy const &sys2) @@ -245,7 +252,7 @@ namespace cuda_cub { // Device to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_device_system(thrust::cuda::execution_policy &sys1, thrust::cuda::execution_policy &) @@ -253,7 +260,7 @@ namespace cuda_cub { // Device to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_device_system(thrust::cuda::execution_policy const &sys1, thrust::cuda::execution_policy const &) @@ -263,7 +270,7 @@ namespace cuda_cub { // Device to host. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_host_system(thrust::cuda::execution_policy &, thrust::execution_policy &sys2) @@ -271,7 +278,7 @@ namespace cuda_cub { // Device to host. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_host_system(thrust::cuda::execution_policy const &, thrust::execution_policy const &sys2) @@ -279,7 +286,7 @@ namespace cuda_cub { // Host to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_host_system(thrust::execution_policy &sys1, thrust::cuda::execution_policy &) @@ -287,7 +294,7 @@ namespace cuda_cub { // Host to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_host_system(thrust::execution_policy const &sys1, thrust::cuda::execution_policy const &) @@ -295,7 +302,7 @@ namespace cuda_cub { // Device to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_host_system(thrust::execution_policy &sys1, thrust::execution_policy &) @@ -303,7 +310,7 @@ namespace cuda_cub { // Device to device. template - __host__ __device__ + _CCCL_HOST_DEVICE auto select_host_system(thrust::execution_policy const &sys1, thrust::execution_policy const &) @@ -312,7 +319,7 @@ namespace cuda_cub { // Device to host. template - __host__ __device__ + _CCCL_HOST_DEVICE cross_system select_system(execution_policy const & sys1, thrust::cpp::execution_policy const &sys2) @@ -324,7 +331,7 @@ namespace cuda_cub { // Host to device. template - __host__ __device__ + _CCCL_HOST_DEVICE cross_system select_system(thrust::cpp::execution_policy const &sys1, execution_policy const & sys2) diff --git a/thrust/system/cuda/detail/equal.h b/thrust/system/cuda/detail/equal.h index aec608245..a9018eb1c 100644 --- a/thrust/system/cuda/detail/equal.h +++ b/thrust/system/cuda/detail/equal.h @@ -40,7 +40,7 @@ template -bool __host__ __device__ +bool _CCCL_HOST_DEVICE equal(execution_policy& policy, InputIt1 first1, InputIt1 last1, @@ -53,7 +53,7 @@ equal(execution_policy& policy, template -bool __host__ __device__ +bool _CCCL_HOST_DEVICE equal(execution_policy& policy, InputIt1 first1, InputIt1 last1, diff --git a/thrust/system/cuda/detail/error.inl b/thrust/system/cuda/detail/error.inl index e52305211..5ff7fc8c1 100644 --- a/thrust/system/cuda/detail/error.inl +++ b/thrust/system/cuda/detail/error.inl @@ -20,7 +20,6 @@ #include #include -#include THRUST_NAMESPACE_BEGIN diff --git a/thrust/system/cuda/detail/extrema.h b/thrust/system/cuda/detail/extrema.h index 4fe7ec86b..c15068c29 100644 --- a/thrust/system/cuda/detail/extrema.h +++ b/thrust/system/cuda/detail/extrema.h @@ -52,10 +52,10 @@ namespace __extrema { Predicate predicate; typedef tuple pair_type; - __host__ __device__ + _CCCL_HOST_DEVICE arg_min_f(Predicate p) : predicate(p) {} - pair_type __device__ + pair_type _CCCL_DEVICE operator()(pair_type const &lhs, pair_type const &rhs) { InputType const &rhs_value = get<0>(rhs); @@ -83,10 +83,10 @@ namespace __extrema { Predicate predicate; typedef tuple pair_type; - __host__ __device__ + _CCCL_HOST_DEVICE arg_max_f(Predicate p) : predicate(p) {} - pair_type __device__ + pair_type _CCCL_DEVICE operator()(pair_type const &lhs, pair_type const &rhs) { InputType const &rhs_value = get<0>(rhs); @@ -119,12 +119,12 @@ namespace __extrema { typedef arg_min_f arg_min_t; typedef arg_max_f arg_max_t; - __host__ __device__ + _CCCL_HOST_DEVICE arg_minmax_f(Predicate p) : predicate(p) { } - two_pairs_type __device__ + two_pairs_type _CCCL_DEVICE operator()(two_pairs_type const &lhs, two_pairs_type const &rhs) { pair_type const &rhs_min = get<0>(rhs); @@ -140,7 +140,7 @@ namespace __extrema { struct duplicate_tuple { - __device__ two_pairs_type + _CCCL_DEVICE two_pairs_type operator()(pair_type const &t) { return thrust::make_tuple(t, t); @@ -408,11 +408,11 @@ namespace __extrema { /// min element -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -ItemsIt __host__ __device__ +ItemsIt _CCCL_HOST_DEVICE min_element(execution_policy &policy, ItemsIt first, ItemsIt last, @@ -432,7 +432,7 @@ min_element(execution_policy &policy, template -ItemsIt __host__ __device__ +ItemsIt _CCCL_HOST_DEVICE min_element(execution_policy &policy, ItemsIt first, ItemsIt last) @@ -443,11 +443,11 @@ min_element(execution_policy &policy, /// max element -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -ItemsIt __host__ __device__ +ItemsIt _CCCL_HOST_DEVICE max_element(execution_policy &policy, ItemsIt first, ItemsIt last, @@ -467,7 +467,7 @@ max_element(execution_policy &policy, template -ItemsIt __host__ __device__ +ItemsIt _CCCL_HOST_DEVICE max_element(execution_policy &policy, ItemsIt first, ItemsIt last) @@ -478,11 +478,11 @@ max_element(execution_policy &policy, /// minmax element -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE minmax_element(execution_policy &policy, ItemsIt first, ItemsIt last, @@ -533,7 +533,7 @@ minmax_element(execution_policy &policy, template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE minmax_element(execution_policy &policy, ItemsIt first, ItemsIt last) diff --git a/thrust/system/cuda/detail/fill.h b/thrust/system/cuda/detail/fill.h index 80ea68592..2bce59cc6 100644 --- a/thrust/system/cuda/detail/fill.h +++ b/thrust/system/cuda/detail/fill.h @@ -59,7 +59,7 @@ namespace __fill { } // namespace __fill template -OutputIterator __host__ __device__ +OutputIterator _CCCL_HOST_DEVICE fill_n(execution_policy& policy, OutputIterator first, Size count, @@ -75,7 +75,7 @@ fill_n(execution_policy& policy, } // func fill_n template -void __host__ __device__ +void _CCCL_HOST_DEVICE fill(execution_policy& policy, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/cuda/detail/find.h b/thrust/system/cuda/detail/find.h index b7d2b748f..5059a689c 100644 --- a/thrust/system/cuda/detail/find.h +++ b/thrust/system/cuda/detail/find.h @@ -42,7 +42,7 @@ namespace cuda_cub { template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE find_if(execution_policy& policy, InputIt first, InputIt last, @@ -51,7 +51,7 @@ find_if(execution_policy& policy, template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE find_if_not(execution_policy& policy, InputIt first, InputIt last, @@ -60,7 +60,7 @@ find_if_not(execution_policy& policy, template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE find(execution_policy &policy, InputIt first, InputIt last, @@ -104,7 +104,7 @@ template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE find_if_n(execution_policy& policy, InputIt first, Size num_items, @@ -173,7 +173,7 @@ find_if_n(execution_policy& policy, template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE find_if(execution_policy& policy, InputIt first, InputIt last, @@ -185,7 +185,7 @@ find_if(execution_policy& policy, template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE find_if_not(execution_policy& policy, InputIt first, InputIt last, @@ -198,7 +198,7 @@ find_if_not(execution_policy& policy, template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE find(execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/future.inl b/thrust/system/cuda/detail/future.inl index f23184aae..346c02fc5 100644 --- a/thrust/system/cuda/detail/future.inl +++ b/thrust/system/cuda/detail/future.inl @@ -49,7 +49,7 @@ THRUST_INLINE_CONSTANT nonowning_t nonowning{}; struct marker_deleter final { - __host__ + _CCCL_HOST void operator()(CUevent_st* e) const { if (nullptr != e) @@ -69,7 +69,7 @@ private: public: /// \brief Create a new stream and construct a handle to it. When the handle /// is destroyed, the stream is destroyed. - __host__ + _CCCL_HOST unique_marker() : handle_(nullptr, marker_deleter()) { @@ -80,29 +80,29 @@ public: handle_.reset(e); } - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE unique_marker(unique_marker const&) = delete; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE unique_marker(unique_marker&&) = default; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE unique_marker& operator=(unique_marker const&) = delete; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE unique_marker& operator=(unique_marker&&) = default; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE ~unique_marker() = default; - __host__ + _CCCL_HOST auto get() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + _CCCL_HOST auto native_handle() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + _CCCL_HOST bool valid() const noexcept { return bool(handle_); } - __host__ + _CCCL_HOST bool ready() const { cudaError_t const err = cudaEventQuery(handle_.get()); @@ -116,19 +116,19 @@ public: return true; } - __host__ + _CCCL_HOST void wait() const { thrust::cuda_cub::throw_on_error(cudaEventSynchronize(handle_.get())); } - __host__ + _CCCL_HOST bool operator==(unique_marker const& other) const { return other.handle_ == handle_; } - __host__ + _CCCL_HOST bool operator!=(unique_marker const& other) const { return !(other == *this); @@ -139,7 +139,7 @@ public: struct stream_deleter final { - __host__ + _CCCL_HOST void operator()(CUstream_st* s) const { if (nullptr != s) @@ -153,15 +153,15 @@ private: bool cond_; public: - __host__ + _CCCL_HOST constexpr stream_conditional_deleter() noexcept : cond_(true) {} - __host__ + _CCCL_HOST explicit constexpr stream_conditional_deleter(nonowning_t) noexcept : cond_(false) {} - __host__ + _CCCL_HOST void operator()(CUstream_st* s) const { if (cond_ && nullptr != s) @@ -183,7 +183,7 @@ private: public: /// \brief Create a new stream and construct a handle to it. When the handle /// is destroyed, the stream is destroyed. - __host__ + _CCCL_HOST unique_stream() : handle_(nullptr, stream_conditional_deleter()) { @@ -196,39 +196,39 @@ public: /// \brief Construct a non-owning handle to an existing stream. When the /// handle is destroyed, the stream is not destroyed. - __host__ + _CCCL_HOST explicit unique_stream(nonowning_t, native_handle_type handle) : handle_(handle, stream_conditional_deleter(nonowning)) {} - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE unique_stream(unique_stream const&) = delete; // GCC 10 complains if this is defaulted. See NVIDIA/thrust#1269. - __thrust_exec_check_disable__ - __host__ unique_stream(unique_stream &&o) noexcept + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST unique_stream(unique_stream &&o) noexcept : handle_(std::move(o.handle_)) {} - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE unique_stream& operator=(unique_stream const&) = delete; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE unique_stream& operator=(unique_stream&&) = default; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE ~unique_stream() = default; - __host__ + _CCCL_HOST auto get() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + _CCCL_HOST auto native_handle() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + _CCCL_HOST bool valid() const noexcept { return bool(handle_); } - __host__ + _CCCL_HOST bool ready() const { cudaError_t const err = cudaStreamQuery(handle_.get()); @@ -242,7 +242,7 @@ public: return true; } - __host__ + _CCCL_HOST void wait() const { thrust::cuda_cub::throw_on_error( @@ -250,7 +250,7 @@ public: ); } - __host__ + _CCCL_HOST void depend_on(unique_marker& e) { thrust::cuda_cub::throw_on_error( @@ -258,7 +258,7 @@ public: ); } - __host__ + _CCCL_HOST void depend_on(unique_stream& s) { if (s != *this) @@ -269,19 +269,19 @@ public: } } - __host__ + _CCCL_HOST void record(unique_marker& e) { thrust::cuda_cub::throw_on_error(cudaEventRecord(e.get(), handle_.get())); } - __host__ + _CCCL_HOST bool operator==(unique_stream const& other) const { return other.handle_ == handle_; } - __host__ + _CCCL_HOST bool operator!=(unique_stream const& other) const { return !(other == *this); @@ -327,43 +327,43 @@ struct acquired_stream final // Precondition: `device` is the current CUDA device. template -__host__ +_CCCL_HOST optional try_acquire_stream(int device, std::unique_ptr&) noexcept; // Precondition: `device` is the current CUDA device. -inline __host__ +inline _CCCL_HOST optional try_acquire_stream(int, unique_stream& stream) noexcept; // Precondition: `device` is the current CUDA device. -inline __host__ +inline _CCCL_HOST optional try_acquire_stream(int device, ready_event&) noexcept; // Precondition: `device` is the current CUDA device. template -inline __host__ +inline _CCCL_HOST optional try_acquire_stream(int device, ready_future&) noexcept; // Precondition: `device` is the current CUDA device. -inline __host__ +inline _CCCL_HOST optional try_acquire_stream(int device, unique_eager_event& parent) noexcept; // Precondition: `device` is the current CUDA device. template -__host__ +_CCCL_HOST optional try_acquire_stream(int device, unique_eager_future& parent) noexcept; template -__host__ +_CCCL_HOST acquired_stream acquire_stream(int device, Dependencies&... deps) noexcept; template -__host__ +_CCCL_HOST unique_eager_event make_dependent_event( std::tuple&& deps @@ -373,7 +373,7 @@ template < typename X, typename XPointer , typename ComputeContent, typename... Dependencies > -__host__ +_CCCL_HOST unique_eager_future_promise_pair make_dependent_future(ComputeContent&& cc, std::tuple&& deps); @@ -386,12 +386,12 @@ protected: public: // Constructs an `async_signal` which uses `stream`. - __host__ + _CCCL_HOST explicit async_signal(unique_stream&& stream) : stream_(std::move(stream)) {} - __host__ + _CCCL_HOST virtual ~async_signal() {} unique_stream& stream() noexcept { return stream_; } @@ -410,7 +410,7 @@ public: // Constructs an `async_keep_alives` which uses `stream`, and keeps the // objects in the tuple `keep_alives` alive until the asynchronous signal is // destroyed. - __host__ + _CCCL_HOST explicit async_keep_alives( unique_stream&& stream, keep_alives_type&& keep_alives ) @@ -418,7 +418,7 @@ public: , keep_alives_(std::move(keep_alives)) {} - __host__ + _CCCL_HOST virtual ~async_keep_alives() {} }; @@ -429,24 +429,24 @@ struct async_value : virtual async_signal using raw_const_pointer = value_type const*; // Constructs an `async_value` which uses `stream` and has no content. - __host__ + _CCCL_HOST explicit async_value(unique_stream stream) : async_signal(std::move(stream)) {} - __host__ + _CCCL_HOST virtual ~async_value() {} - __host__ + _CCCL_HOST virtual bool valid_content() const noexcept { return false; } - __host__ + _CCCL_HOST virtual value_type get() { throw thrust::event_error(event_errc::no_state); } - __host__ + _CCCL_HOST virtual value_type extract() { throw thrust::event_error(event_errc::no_state); @@ -454,7 +454,7 @@ struct async_value : virtual async_signal // For testing only. #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) - __host__ + _CCCL_HOST virtual raw_const_pointer raw_data() const { return nullptr; @@ -496,7 +496,7 @@ public: // in generated host code, which leads to -Wreorder warnings. THRUST_DISABLE_CLANG_AND_GCC_INITIALIZER_REORDERING_WARNING_BEGIN template - __host__ + _CCCL_HOST explicit async_addressable_value_with_keep_alives( unique_stream&& stream , keep_alives_type&& keep_alives @@ -512,15 +512,15 @@ public: } THRUST_DISABLE_CLANG_AND_GCC_INITIALIZER_REORDERING_WARNING_END - __host__ + _CCCL_HOST bool valid_content() const noexcept final override { return nullptr != content_; } // Precondition: `true == valid_content()`. - __host__ - pointer data() + _CCCL_HOST + pointer data() { if (!valid_content()) throw thrust::event_error(event_errc::no_content); @@ -529,8 +529,8 @@ public: } // Precondition: `true == valid_content()`. - __host__ - const_pointer data() const + _CCCL_HOST + const_pointer data() const { if (!valid_content()) throw thrust::event_error(event_errc::no_content); @@ -540,7 +540,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - __host__ + _CCCL_HOST value_type get() final override { this->stream().wait(); @@ -549,7 +549,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - __host__ + _CCCL_HOST value_type extract() final override { this->stream().wait(); @@ -558,7 +558,7 @@ public: // For testing only. #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) - __host__ + _CCCL_HOST raw_const_pointer raw_data() const final override { return raw_pointer_cast(content_); @@ -589,20 +589,20 @@ private: {} public: - __host__ __device__ + _CCCL_HOST_DEVICE weak_promise() : device_(0), content_{} {} - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE weak_promise(weak_promise const&) = default; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE weak_promise(weak_promise&&) = default; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE weak_promise& operator=(weak_promise const&) = default; - __thrust_exec_check_disable__ + _CCCL_EXEC_CHECK_DISABLE weak_promise& operator=(weak_promise&&) = default; template - __host__ __device__ + _CCCL_HOST_DEVICE void set_value(U&& value) && { *content_ = THRUST_FWD(value); @@ -612,7 +612,7 @@ public: typename X, typename XPointer , typename ComputeContent, typename... Dependencies > - friend __host__ + friend _CCCL_HOST unique_eager_future_promise_pair thrust::system::cuda::detail::make_dependent_future( ComputeContent&& cc, std::tuple&& deps @@ -628,13 +628,13 @@ struct ready_event final ready_event() = default; template - __host__ __device__ + _CCCL_HOST_DEVICE explicit ready_event(ready_future) {} - __host__ __device__ + _CCCL_HOST_DEVICE static constexpr bool valid_content() noexcept { return true; } - __host__ __device__ + _CCCL_HOST_DEVICE static constexpr bool ready() noexcept { return true; } }; @@ -648,7 +648,7 @@ private: value_type value_; public: - __host__ __device__ + _CCCL_HOST_DEVICE ready_future() : value_{} {} ready_future(ready_future&&) = default; @@ -657,30 +657,30 @@ public: ready_future& operator=(ready_future const&) = default; template - __host__ __device__ + _CCCL_HOST_DEVICE explicit ready_future(U&& u) : value_(THRUST_FWD(u)) {} - __host__ __device__ + _CCCL_HOST_DEVICE static constexpr bool valid_content() noexcept { return true; } - __host__ __device__ + _CCCL_HOST_DEVICE static constexpr bool ready() noexcept { return true; } - __host__ __device__ + _CCCL_HOST_DEVICE value_type get() const { return value_; } - THRUST_NODISCARD __host__ __device__ - value_type extract() + THRUST_NODISCARD _CCCL_HOST_DEVICE + value_type extract() { return std::move(value_); } #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) // For testing only. - __host__ __device__ + _CCCL_HOST_DEVICE raw_const_pointer data() const { return addressof(value_); @@ -694,7 +694,7 @@ protected: int device_ = 0; std::unique_ptr async_signal_; - __host__ + _CCCL_HOST explicit unique_eager_event( int device_id, std::unique_ptr async_signal ) @@ -702,7 +702,7 @@ protected: {} public: - __host__ + _CCCL_HOST unique_eager_event() : device_(0), async_signal_() {} @@ -715,13 +715,13 @@ public: // Any `unique_eager_future` can be explicitly converted to a // `unique_eager_event`. template - __host__ + _CCCL_HOST explicit unique_eager_event(unique_eager_future&& other) // NOTE: We upcast to `unique_ptr` here. : device_(other.where()), async_signal_(std::move(other.async_signal_)) {} - __host__ + _CCCL_HOST // NOTE: We take `new_stream_t` by `const&` because it is incomplete here. explicit unique_eager_event(new_stream_t const&) : device_(0) @@ -730,7 +730,7 @@ public: thrust::cuda_cub::throw_on_error(cudaGetDevice(&device_)); } - __host__ + _CCCL_HOST virtual ~unique_eager_event() { // FIXME: If we could asynchronously handle destruction of keep alives, we @@ -738,13 +738,13 @@ public: if (valid_stream()) wait(); } - __host__ + _CCCL_HOST bool valid_stream() const noexcept { return bool(async_signal_); } - __host__ + _CCCL_HOST bool ready() const noexcept { if (valid_stream()) @@ -754,7 +754,7 @@ public: } // Precondition: `true == valid_stream()`. - __host__ + _CCCL_HOST detail::unique_stream& stream() { if (!valid_stream()) @@ -770,24 +770,24 @@ public: return async_signal_->stream(); } - __host__ + _CCCL_HOST int where() const noexcept { return device_; } // Precondition: `true == valid_stream()`. - __host__ + _CCCL_HOST void wait() { stream().wait(); } - friend __host__ + friend _CCCL_HOST optional thrust::system::cuda::detail::try_acquire_stream( int device_id, unique_eager_event& parent ) noexcept; template - friend __host__ + friend _CCCL_HOST unique_eager_event thrust::system::cuda::detail::make_dependent_event( std::tuple&& deps @@ -809,7 +809,7 @@ private: int device_ = 0; std::unique_ptr> async_signal_; - __host__ + _CCCL_HOST explicit unique_eager_future( int device_id, std::unique_ptr> async_signal ) @@ -817,7 +817,7 @@ private: {} public: - __host__ + _CCCL_HOST unique_eager_future() : device_(0), async_signal_() {} @@ -827,7 +827,7 @@ public: unique_eager_future& operator=(unique_eager_future&&) = default; unique_eager_future& operator=(unique_eager_future const&) = delete; - __host__ + _CCCL_HOST // NOTE: We take `new_stream_t` by `const&` because it is incomplete here. explicit unique_eager_future(new_stream_t const&) : device_(0) @@ -836,7 +836,7 @@ public: thrust::cuda_cub::throw_on_error(cudaGetDevice(&device_)); } - __host__ + _CCCL_HOST ~unique_eager_future() { // FIXME: If we could asynchronously handle destruction of keep alives, we @@ -844,13 +844,13 @@ public: if (valid_stream()) wait(); } - __host__ + _CCCL_HOST bool valid_stream() const noexcept { return bool(async_signal_); } - __host__ + _CCCL_HOST bool valid_content() const noexcept { if (!valid_stream()) @@ -862,7 +862,7 @@ public: } // Precondition: `true == valid_stream()`. - __host__ + _CCCL_HOST bool ready() const noexcept { if (valid_stream()) @@ -872,7 +872,7 @@ public: } // Precondition: `true == valid_stream()`. - __host__ + _CCCL_HOST detail::unique_stream& stream() { if (!valid_stream()) @@ -880,7 +880,7 @@ public: return async_signal_->stream(); } - __host__ + _CCCL_HOST detail::unique_stream const& stream() const { if (!valid_stream()) @@ -889,12 +889,12 @@ public: return async_signal_->stream(); } - __host__ + _CCCL_HOST int where() const noexcept { return device_; } // Blocks. // Precondition: `true == valid_stream()`. - __host__ + _CCCL_HOST void wait() { stream().wait(); @@ -902,7 +902,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - __host__ + _CCCL_HOST value_type get() { if (!valid_content()) @@ -913,7 +913,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - THRUST_NODISCARD __host__ + THRUST_NODISCARD _CCCL_HOST value_type extract() { if (!valid_content()) @@ -927,7 +927,7 @@ public: // For testing only. #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) // Precondition: `true == valid_stream()`. - __host__ + _CCCL_HOST raw_const_pointer raw_data() const { if (!valid_stream()) @@ -938,7 +938,7 @@ public: #endif template - friend __host__ + friend _CCCL_HOST optional thrust::system::cuda::detail::try_acquire_stream( int device_id, unique_eager_future& parent @@ -948,7 +948,7 @@ public: typename X, typename XPointer , typename ComputeContent, typename... Dependencies > - friend __host__ + friend _CCCL_HOST detail::unique_eager_future_promise_pair thrust::system::cuda::detail::make_dependent_future( ComputeContent&& cc, std::tuple&& deps @@ -962,7 +962,7 @@ public: namespace detail { template -__host__ +_CCCL_HOST optional try_acquire_stream(int, std::unique_ptr&) noexcept { @@ -970,14 +970,14 @@ try_acquire_stream(int, std::unique_ptr&) noexcept return {}; } -inline __host__ +inline _CCCL_HOST optional try_acquire_stream(int, unique_stream& stream) noexcept { return {std::move(stream)}; } -inline __host__ +inline _CCCL_HOST optional try_acquire_stream(int, ready_event&) noexcept { @@ -986,7 +986,7 @@ try_acquire_stream(int, ready_event&) noexcept } template -__host__ +_CCCL_HOST optional try_acquire_stream(int, ready_future&) noexcept { @@ -994,7 +994,7 @@ try_acquire_stream(int, ready_future&) noexcept return {}; } -__host__ +_CCCL_HOST optional try_acquire_stream(int device_id, unique_eager_event& parent) noexcept { @@ -1008,7 +1008,7 @@ try_acquire_stream(int device_id, unique_eager_event& parent) noexcept } template -__host__ +_CCCL_HOST optional try_acquire_stream(int device_id, unique_eager_future& parent) noexcept { @@ -1024,7 +1024,7 @@ try_acquire_stream(int device_id, unique_eager_future& parent) noexcept /////////////////////////////////////////////////////////////////////////////// template -__host__ +_CCCL_HOST acquired_stream acquire_stream_impl( int, std::tuple&, index_sequence<> ) noexcept @@ -1035,7 +1035,7 @@ acquired_stream acquire_stream_impl( } template -__host__ +_CCCL_HOST acquired_stream acquire_stream_impl( int device_id , std::tuple& deps, index_sequence @@ -1050,7 +1050,7 @@ acquired_stream acquire_stream_impl( } template -__host__ +_CCCL_HOST acquired_stream acquire_stream( int device_id , std::tuple& deps @@ -1064,26 +1064,26 @@ acquired_stream acquire_stream( /////////////////////////////////////////////////////////////////////////////// template -__host__ +_CCCL_HOST void create_dependency( unique_stream&, std::unique_ptr& ) noexcept {} -inline __host__ +inline _CCCL_HOST void create_dependency( unique_stream&, ready_event& ) noexcept {} template -__host__ +_CCCL_HOST void create_dependency( unique_stream&, ready_future& ) noexcept {} -inline __host__ +inline _CCCL_HOST void create_dependency( unique_stream& child, unique_stream& parent ) @@ -1091,7 +1091,7 @@ void create_dependency( child.depend_on(parent); } -inline __host__ +inline _CCCL_HOST void create_dependency( unique_stream& child, unique_eager_event& parent ) @@ -1100,7 +1100,7 @@ void create_dependency( } template -__host__ +_CCCL_HOST void create_dependency( unique_stream& child, unique_eager_future& parent ) @@ -1109,7 +1109,7 @@ void create_dependency( } template -__host__ +_CCCL_HOST void create_dependencies_impl( acquired_stream& , std::tuple&, index_sequence<> @@ -1117,7 +1117,7 @@ void create_dependencies_impl( {} template -__host__ +_CCCL_HOST void create_dependencies_impl( acquired_stream& as , std::tuple& deps, index_sequence @@ -1134,7 +1134,7 @@ void create_dependencies_impl( } template -__host__ +_CCCL_HOST void create_dependencies(acquired_stream& as, std::tuple& deps) { create_dependencies_impl( @@ -1267,7 +1267,7 @@ struct find_keep_alives_impl< /////////////////////////////////////////////////////////////////////////////// template -__host__ +_CCCL_HOST unique_eager_event make_dependent_event(std::tuple&& deps) { int device_id = 0; @@ -1303,7 +1303,7 @@ template < typename X, typename XPointer , typename ComputeContent, typename... Dependencies > -__host__ +_CCCL_HOST unique_eager_future_promise_pair make_dependent_future(ComputeContent&& cc, std::tuple&& deps) { @@ -1345,7 +1345,7 @@ make_dependent_future(ComputeContent&& cc, std::tuple&& deps) /////////////////////////////////////////////////////////////////////////////// template -__host__ +_CCCL_HOST unique_eager_event when_all(Events&&... evs) // TODO: Constrain to events, futures, and maybe streams (currently allows keep // alives). @@ -1354,13 +1354,13 @@ unique_eager_event when_all(Events&&... evs) } // ADL hook for transparent `.after` move support. -inline __host__ +inline _CCCL_HOST auto capture_as_dependency(unique_eager_event& dependency) THRUST_DECLTYPE_RETURNS(std::move(dependency)) // ADL hook for transparent `.after` move support. template -__host__ +_CCCL_HOST auto capture_as_dependency(unique_eager_future& dependency) THRUST_DECLTYPE_RETURNS(std::move(dependency)) diff --git a/thrust/system/cuda/detail/gather.h b/thrust/system/cuda/detail/gather.h index 56ff3aecf..b2a130362 100644 --- a/thrust/system/cuda/detail/gather.h +++ b/thrust/system/cuda/detail/gather.h @@ -39,7 +39,7 @@ template -ResultIt __host__ __device__ +ResultIt _CCCL_HOST_DEVICE gather(execution_policy& policy, MapIt map_first, MapIt map_last, @@ -60,7 +60,7 @@ template -ResultIt __host__ __device__ +ResultIt _CCCL_HOST_DEVICE gather_if(execution_policy& policy, MapIt map_first, MapIt map_last, @@ -83,7 +83,7 @@ template -ResultIt __host__ __device__ +ResultIt _CCCL_HOST_DEVICE gather_if(execution_policy& policy, MapIt map_first, MapIt map_last, diff --git a/thrust/system/cuda/detail/generate.h b/thrust/system/cuda/detail/generate.h index ad6340f83..5d515f73c 100644 --- a/thrust/system/cuda/detail/generate.h +++ b/thrust/system/cuda/detail/generate.h @@ -60,7 +60,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE generate_n(execution_policy &policy, OutputIt result, Size count, @@ -76,7 +76,7 @@ generate_n(execution_policy &policy, template -void __host__ __device__ +void _CCCL_HOST_DEVICE generate(execution_policy &policy, OutputIt first, OutputIt last, diff --git a/thrust/system/cuda/detail/get_value.h b/thrust/system/cuda/detail/get_value.h index 9065f773a..bb96d245b 100644 --- a/thrust/system/cuda/detail/get_value.h +++ b/thrust/system/cuda/detail/get_value.h @@ -35,7 +35,7 @@ namespace template -inline __host__ __device__ +inline _CCCL_HOST_DEVICE typename thrust::iterator_value::type get_value_msvc2005_war(execution_policy &exec, Pointer ptr) { @@ -44,7 +44,7 @@ inline __host__ __device__ // XXX war nvbugs/881631 struct war_nvbugs_881631 { - __host__ inline static result_type host_path(execution_policy &exec, Pointer ptr) + _CCCL_HOST inline static result_type host_path(execution_policy &exec, Pointer ptr) { // when called from host code, implement with assign_value // note that this requires a type with default constructor @@ -57,7 +57,7 @@ inline __host__ __device__ return result; } - __device__ inline static result_type device_path(execution_policy &, Pointer ptr) + _CCCL_DEVICE inline static result_type device_path(execution_policy &, Pointer ptr) { // when called from device code, just do simple deref return *thrust::raw_pointer_cast(ptr); @@ -72,7 +72,7 @@ inline __host__ __device__ template -inline __host__ __device__ +inline _CCCL_HOST_DEVICE typename thrust::iterator_value::type get_value(execution_policy &exec, Pointer ptr) { diff --git a/thrust/system/cuda/detail/guarded_cuda_runtime_api.h b/thrust/system/cuda/detail/guarded_cuda_runtime_api.h deleted file mode 100644 index 5b0f345a7..000000000 --- a/thrust/system/cuda/detail/guarded_cuda_runtime_api.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2008-2013 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to check for the existence of macros -// such as __host__ and __device__, which may already be defined by thrust -// and to undefine them before entering cuda_runtime_api.h (which will redefine them) - -// we only try to do this stuff if cuda/include/host_defines.h has been included -#if !defined(__HOST_DEFINES_H__) - -#ifdef __host__ -#undef __host__ -#endif // __host__ - -#ifdef __device__ -#undef __device__ -#endif // __device__ - -#endif // __HOST_DEFINES_H__ - -#include - diff --git a/thrust/system/cuda/detail/guarded_driver_types.h b/thrust/system/cuda/detail/guarded_driver_types.h deleted file mode 100644 index 076964071..000000000 --- a/thrust/system/cuda/detail/guarded_driver_types.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2008-2013 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include without causing -// warnings from redefinitions of __host__ and __device__. -// carefully save their definitions and restore them -// can't tell exactly when push_macro & pop_macro were introduced to gcc; assume 4.5.0 - - -#if !defined(__GNUC__) || ((10000 * __GNUC__ + 100 * __GNUC_MINOR__ + __GNUC_PATCHLEVEL__) >= 40500) -# ifdef __host__ -# pragma push_macro("__host__") -# undef __host__ -# define THRUST_HOST_NEEDS_RESTORATION -# endif -# ifdef __device__ -# pragma push_macro("__device__") -# undef __device__ -# define THRUST_DEVICE_NEEDS_RESTORATION -# endif -#else // GNUC pre 4.5.0 -# if !defined(__DRIVER_TYPES_H__) -# ifdef __host__ -# undef __host__ -# endif -# ifdef __device__ -# undef __device__ -# endif -# endif // __DRIVER_TYPES_H__ -#endif // __GNUC__ - - -#include - - -#if !defined(__GNUC__) || ((10000 * __GNUC__ + 100 * __GNUC_MINOR__ + __GNUC_PATCHLEVEL__) >= 40500) -# ifdef THRUST_HOST_NEEDS_RESTORATION -# pragma pop_macro("__host__") -# undef THRUST_HOST_NEEDS_RESTORATION -# endif -# ifdef THRUST_DEVICE_NEEDS_RESTORATION -# pragma pop_macro("__device__") -# undef THRUST_DEVICE_NEEDS_RESTORATION -# endif -#endif // __GNUC__ - diff --git a/thrust/system/cuda/detail/inner_product.h b/thrust/system/cuda/detail/inner_product.h index 98e9064d2..6d4027bf5 100644 --- a/thrust/system/cuda/detail/inner_product.h +++ b/thrust/system/cuda/detail/inner_product.h @@ -44,7 +44,7 @@ template -T __host__ __device__ +T _CCCL_HOST_DEVICE inner_product(execution_policy &policy, InputIt1 first1, InputIt1 last1, @@ -72,7 +72,7 @@ template -T __host__ __device__ +T _CCCL_HOST_DEVICE inner_product(execution_policy &policy, InputIt1 first1, InputIt1 last1, diff --git a/thrust/system/cuda/detail/internal/copy_cross_system.h b/thrust/system/cuda/detail/internal/copy_cross_system.h index a1208c67c..cdb5e3736 100644 --- a/thrust/system/cuda/detail/internal/copy_cross_system.h +++ b/thrust/system/cuda/detail/internal/copy_cross_system.h @@ -91,7 +91,7 @@ namespace __copy { class InputIt, class Size, class OutputIt> - OutputIt __host__ + OutputIt _CCCL_HOST cross_system_copy_n(thrust::execution_policy& sys1, thrust::execution_policy& sys2, InputIt begin, @@ -118,7 +118,7 @@ namespace __copy { class InputIt, class Size, class OutputIt> - OutputIt __host__ + OutputIt _CCCL_HOST cross_system_copy_n(thrust::cpp::execution_policy& host_s, thrust::cuda_cub::execution_policy& device_s, InputIt first, @@ -166,7 +166,7 @@ namespace __copy { class InputIt, class Size, class OutputIt> - OutputIt __host__ + OutputIt _CCCL_HOST cross_system_copy_n(thrust::cuda_cub::execution_policy& device_s, thrust::cpp::execution_policy& host_s, InputIt first, @@ -207,7 +207,7 @@ namespace __copy { class InputIt, class Size, class OutputIt> - OutputIt __host__ + OutputIt _CCCL_HOST cross_system_copy_n(cross_system systems, InputIt begin, Size n, @@ -226,7 +226,7 @@ namespace __copy { class System2, class InputIterator, class OutputIterator> - OutputIterator __host__ + OutputIterator _CCCL_HOST cross_system_copy(cross_system systems, InputIterator begin, InputIterator end, diff --git a/thrust/system/cuda/detail/iter_swap.h b/thrust/system/cuda/detail/iter_swap.h index c0628610a..71b5eed17 100644 --- a/thrust/system/cuda/detail/iter_swap.h +++ b/thrust/system/cuda/detail/iter_swap.h @@ -33,18 +33,18 @@ namespace cuda_cub { template -inline __host__ __device__ +inline _CCCL_HOST_DEVICE void iter_swap(thrust::cuda::execution_policy &, Pointer1 a, Pointer2 b) { // XXX war nvbugs/881631 struct war_nvbugs_881631 { - __host__ inline static void host_path(Pointer1 a, Pointer2 b) + _CCCL_HOST inline static void host_path(Pointer1 a, Pointer2 b) { thrust::swap_ranges(a, a + 1, b); } - __device__ inline static void device_path(Pointer1 a, Pointer2 b) + _CCCL_DEVICE inline static void device_path(Pointer1 a, Pointer2 b) { using thrust::swap; swap(*thrust::raw_pointer_cast(a), diff --git a/thrust/system/cuda/detail/malloc_and_free.h b/thrust/system/cuda/detail/malloc_and_free.h index 1b12e2cc3..2b505a4fc 100644 --- a/thrust/system/cuda/detail/malloc_and_free.h +++ b/thrust/system/cuda/detail/malloc_and_free.h @@ -16,7 +16,16 @@ #pragma once -#include +#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header + #include #include @@ -51,7 +60,7 @@ inline cub::CachingDeviceAllocator &get_allocator() // note that malloc returns a raw pointer to avoid // depending on the heavyweight thrust/system/cuda/memory.h header template -__host__ __device__ +_CCCL_HOST_DEVICE void *malloc(execution_policy &, std::size_t n) { void *result = 0; @@ -91,7 +100,7 @@ void *malloc(execution_policy &, std::size_t n) template -__host__ __device__ +_CCCL_HOST_DEVICE void free(execution_policy &, Pointer ptr) { // need to repeat a lot of code here because we can't use #if inside of the diff --git a/thrust/system/cuda/detail/memory.inl b/thrust/system/cuda/detail/memory.inl index f6fc98359..f096febbc 100644 --- a/thrust/system/cuda/detail/memory.inl +++ b/thrust/system/cuda/detail/memory.inl @@ -25,7 +25,7 @@ THRUST_NAMESPACE_BEGIN namespace cuda_cub { -__host__ __device__ +_CCCL_HOST_DEVICE pointer malloc(std::size_t n) { tag cuda_tag; @@ -33,14 +33,14 @@ pointer malloc(std::size_t n) } // end malloc() template -__host__ __device__ +_CCCL_HOST_DEVICE pointer malloc(std::size_t n) { pointer raw_ptr = thrust::cuda_cub::malloc(sizeof(T) * n); return pointer(reinterpret_cast(raw_ptr.get())); } // end malloc() -__host__ __device__ +_CCCL_HOST_DEVICE void free(pointer ptr) { tag cuda_tag; diff --git a/thrust/system/cuda/detail/merge.h b/thrust/system/cuda/detail/merge.h index 478e3508d..81f3c6c00 100644 --- a/thrust/system/cuda/detail/merge.h +++ b/thrust/system/cuda/detail/merge.h @@ -856,13 +856,13 @@ namespace __merge { //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -ResultIt __host__ __device__ +ResultIt _CCCL_HOST_DEVICE merge(execution_policy& policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -897,7 +897,7 @@ merge(execution_policy& policy, } template -ResultIt __host__ __device__ +ResultIt _CCCL_HOST_DEVICE merge(execution_policy& policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -915,7 +915,7 @@ merge(execution_policy& policy, less()); } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE merge_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -968,7 +968,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE merge_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, diff --git a/thrust/system/cuda/detail/mismatch.h b/thrust/system/cuda/detail/mismatch.h index b1e2f44d2..808ea6993 100644 --- a/thrust/system/cuda/detail/mismatch.h +++ b/thrust/system/cuda/detail/mismatch.h @@ -41,7 +41,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE mismatch(execution_policy& policy, InputIt1 first1, InputIt1 last1, @@ -51,7 +51,7 @@ mismatch(execution_policy& policy, template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE mismatch(execution_policy& policy, InputIt1 first1, InputIt1 last1, @@ -68,7 +68,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE mismatch(execution_policy& policy, InputIt1 first1, InputIt1 last1, @@ -95,7 +95,7 @@ mismatch(execution_policy& policy, template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE mismatch(execution_policy& policy, InputIt1 first1, InputIt1 last1, diff --git a/thrust/system/cuda/detail/par.h b/thrust/system/cuda/detail/par.h index 42c701ca7..4b503c3b1 100644 --- a/thrust/system/cuda/detail/par.h +++ b/thrust/system/cuda/detail/par.h @@ -27,7 +27,14 @@ #pragma once #include -#include + +#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) +# pragma GCC system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) +# pragma clang system_header +#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) +# pragma system_header +#endif // no system header #include #include @@ -48,8 +55,8 @@ struct execute_on_stream_base : execution_policy cudaStream_t stream; public: - __thrust_exec_check_disable__ - __host__ __device__ + _CCCL_EXEC_CHECK_DISABLE + _CCCL_HOST_DEVICE execute_on_stream_base(cudaStream_t stream_ = default_stream()) : stream(stream_){} @@ -63,7 +70,7 @@ struct execute_on_stream_base : execution_policy } private: - friend __host__ __device__ + friend _CCCL_HOST_DEVICE cudaStream_t get_stream(const execute_on_stream_base &exec) { @@ -78,7 +85,7 @@ struct execute_on_stream_nosync_base : execution_policy cudaStream_t stream; public: - __host__ __device__ + _CCCL_HOST_DEVICE execute_on_stream_nosync_base(cudaStream_t stream_ = default_stream()) : stream(stream_){} @@ -92,14 +99,14 @@ struct execute_on_stream_nosync_base : execution_policy } private: - friend __host__ __device__ + friend _CCCL_HOST_DEVICE cudaStream_t get_stream(const execute_on_stream_nosync_base &exec) { return exec.stream; } - friend __host__ __device__ + friend _CCCL_HOST_DEVICE bool must_perform_optional_stream_synchronization(const execute_on_stream_nosync_base &) { @@ -111,10 +118,10 @@ struct execute_on_stream : execute_on_stream_base { typedef execute_on_stream_base base_t; - __host__ __device__ + _CCCL_HOST_DEVICE execute_on_stream() : base_t(){}; - __host__ __device__ - execute_on_stream(cudaStream_t stream) + _CCCL_HOST_DEVICE + execute_on_stream(cudaStream_t stream) : base_t(stream){}; }; @@ -122,10 +129,10 @@ struct execute_on_stream_nosync : execute_on_stream_nosync_base base_t; - __host__ __device__ + _CCCL_HOST_DEVICE execute_on_stream_nosync() : base_t(){}; - __host__ __device__ - execute_on_stream_nosync(cudaStream_t stream) + _CCCL_HOST_DEVICE + execute_on_stream_nosync(cudaStream_t stream) : base_t(stream){}; }; @@ -140,7 +147,7 @@ struct par_t : execution_policy, { typedef execution_policy base_t; - __host__ __device__ + _CCCL_HOST_DEVICE constexpr par_t() : base_t() {} typedef execute_on_stream stream_attachment_type; @@ -163,7 +170,7 @@ struct par_nosync_t : execution_policy, { typedef execution_policy base_t; - __host__ __device__ + _CCCL_HOST_DEVICE constexpr par_nosync_t() : base_t() {} typedef execute_on_stream_nosync stream_attachment_type; @@ -178,7 +185,7 @@ struct par_nosync_t : execution_policy, private: //this function is defined to allow non-blocking calls on the default_stream() with thrust::cuda::par_nosync //without explicitly using thrust::cuda::par_nosync.on(default_stream()) - friend __host__ __device__ + friend _CCCL_HOST_DEVICE bool must_perform_optional_stream_synchronization(const par_nosync_t &) { diff --git a/thrust/system/cuda/detail/par_to_seq.h b/thrust/system/cuda/detail/par_to_seq.h index e710f017b..983f935f8 100644 --- a/thrust/system/cuda/detail/par_to_seq.h +++ b/thrust/system/cuda/detail/par_to_seq.h @@ -45,7 +45,7 @@ struct cvt_to_seq_impl { typedef thrust::detail::seq_t seq_t; - static seq_t __host__ __device__ + static seq_t _CCCL_HOST_DEVICE doit(Policy&) { return seq_t(); @@ -67,7 +67,7 @@ struct cvt_to_seq_impl< seq_t; - static seq_t __host__ __device__ + static seq_t _CCCL_HOST_DEVICE doit(Policy& policy) { return seq_t(policy.m_alloc); @@ -76,7 +76,7 @@ struct cvt_to_seq_impl< #endif template -typename cvt_to_seq_impl::seq_t __host__ __device__ +typename cvt_to_seq_impl::seq_t _CCCL_HOST_DEVICE cvt_to_seq(Policy& policy) { return cvt_to_seq_impl::doit(policy); diff --git a/thrust/system/cuda/detail/parallel_for.h b/thrust/system/cuda/detail/parallel_for.h index 43c3297aa..f7e0ecf9f 100644 --- a/thrust/system/cuda/detail/parallel_for.h +++ b/thrust/system/cuda/detail/parallel_for.h @@ -143,11 +143,11 @@ namespace __parallel_for { } } // __parallel_for -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -void __host__ __device__ +void _CCCL_HOST_DEVICE parallel_for(execution_policy &policy, F f, Size count) diff --git a/thrust/system/cuda/detail/partition.h b/thrust/system/cuda/detail/partition.h index fad75eb0d..213a11799 100644 --- a/thrust/system/cuda/detail/partition.h +++ b/thrust/system/cuda/detail/partition.h @@ -830,14 +830,14 @@ namespace __partition { // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE partition_copy(execution_policy &policy, InputIt first, InputIt last, @@ -865,13 +865,13 @@ partition_copy(execution_policy &policy, return ret; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE partition_copy(execution_policy &policy, InputIt first, InputIt last, @@ -897,13 +897,13 @@ partition_copy(execution_policy &policy, return ret; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE stable_partition_copy(execution_policy &policy, InputIt first, InputIt last, @@ -929,14 +929,14 @@ stable_partition_copy(execution_policy &policy, return ret; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE stable_partition_copy(execution_policy &policy, InputIt first, InputIt last, @@ -966,12 +966,12 @@ stable_partition_copy(execution_policy &policy, /// inplace -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -Iterator __host__ __device__ +Iterator _CCCL_HOST_DEVICE partition(execution_policy &policy, Iterator first, Iterator last, @@ -989,11 +989,11 @@ partition(execution_policy &policy, return last; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -Iterator __host__ __device__ +Iterator _CCCL_HOST_DEVICE partition(execution_policy &policy, Iterator first, Iterator last, @@ -1012,12 +1012,12 @@ partition(execution_policy &policy, return last; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -Iterator __host__ __device__ +Iterator _CCCL_HOST_DEVICE stable_partition(execution_policy &policy, Iterator first, Iterator last, @@ -1040,11 +1040,11 @@ stable_partition(execution_policy &policy, return ret; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -Iterator __host__ __device__ +Iterator _CCCL_HOST_DEVICE stable_partition(execution_policy &policy, Iterator first, Iterator last, @@ -1071,7 +1071,7 @@ stable_partition(execution_policy &policy, template -bool __host__ __device__ +bool _CCCL_HOST_DEVICE is_partitioned(execution_policy &policy, ItemsIt first, ItemsIt last, diff --git a/thrust/system/cuda/detail/per_device_resource.h b/thrust/system/cuda/detail/per_device_resource.h index 414ea7788..dcafdc0ad 100644 --- a/thrust/system/cuda/detail/per_device_resource.h +++ b/thrust/system/cuda/detail/per_device_resource.h @@ -49,7 +49,7 @@ namespace cuda_cub { template -__host__ +_CCCL_HOST MR * get_per_device_resource(execution_policy&) { static std::mutex map_lock; diff --git a/thrust/system/cuda/detail/reduce.h b/thrust/system/cuda/detail/reduce.h index 41d9075da..f2be638d3 100644 --- a/thrust/system/cuda/detail/reduce.h +++ b/thrust/system/cuda/detail/reduce.h @@ -58,7 +58,7 @@ template -T __host__ __device__ +T _CCCL_HOST_DEVICE reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -997,13 +997,13 @@ T reduce_n_impl(execution_policy& policy, // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE T reduce_n(execution_policy& policy, InputIt first, Size num_items, @@ -1025,7 +1025,7 @@ T reduce_n(execution_policy& policy, } template -__host__ __device__ +_CCCL_HOST_DEVICE T reduce(execution_policy &policy, InputIt first, InputIt last, @@ -1041,7 +1041,7 @@ T reduce(execution_policy &policy, template -__host__ __device__ +_CCCL_HOST_DEVICE T reduce(execution_policy &policy, InputIt first, InputIt last, @@ -1052,7 +1052,7 @@ T reduce(execution_policy &policy, template -__host__ __device__ +_CCCL_HOST_DEVICE typename iterator_traits::value_type reduce(execution_policy &policy, InputIt first, diff --git a/thrust/system/cuda/detail/reduce_by_key.h b/thrust/system/cuda/detail/reduce_by_key.h index 2933d062a..c17c2a312 100644 --- a/thrust/system/cuda/detail/reduce_by_key.h +++ b/thrust/system/cuda/detail/reduce_by_key.h @@ -59,7 +59,7 @@ template -__host__ __device__ thrust::pair +_CCCL_HOST_DEVICE thrust::pair reduce_by_key( const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -1107,7 +1107,7 @@ namespace __reduce_by_key { // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE reduce_by_key(execution_policy &policy, KeyInputIt keys_first, KeyInputIt keys_last, @@ -1152,7 +1152,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE reduce_by_key(execution_policy &policy, KeyInputIt keys_first, KeyInputIt keys_last, @@ -1181,7 +1181,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE reduce_by_key(execution_policy &policy, KeyInputIt keys_first, KeyInputIt keys_last, diff --git a/thrust/system/cuda/detail/remove.h b/thrust/system/cuda/detail/remove.h index 836d8f5ea..a52dc187a 100644 --- a/thrust/system/cuda/detail/remove.h +++ b/thrust/system/cuda/detail/remove.h @@ -40,7 +40,7 @@ template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE remove_if(execution_policy &policy, InputIt first, InputIt last, @@ -54,7 +54,7 @@ remove_if(execution_policy &policy, template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE remove_if(execution_policy &policy, InputIt first, InputIt last, @@ -68,7 +68,7 @@ remove_if(execution_policy &policy, template -InputIt __host__ __device__ +InputIt _CCCL_HOST_DEVICE remove(execution_policy &policy, InputIt first, InputIt last, @@ -86,7 +86,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE remove_copy_if(execution_policy &policy, InputIt first, InputIt last, @@ -102,7 +102,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE remove_copy_if(execution_policy &policy, InputIt first, InputIt last, @@ -118,7 +118,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE remove_copy(execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/replace.h b/thrust/system/cuda/detail/replace.h index af8b8fa95..4ea94ba91 100644 --- a/thrust/system/cuda/detail/replace.h +++ b/thrust/system/cuda/detail/replace.h @@ -83,7 +83,7 @@ namespace cuda_cub { template -void __host__ __device__ +void _CCCL_HOST_DEVICE replace(execution_policy &policy, Iterator first, Iterator last, @@ -104,7 +104,7 @@ template -void __host__ __device__ +void _CCCL_HOST_DEVICE replace_if(execution_policy &policy, Iterator first, Iterator last, @@ -124,7 +124,7 @@ template -void __host__ __device__ +void _CCCL_HOST_DEVICE replace_if(execution_policy &policy, Iterator first, Iterator last, @@ -146,7 +146,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE replace_copy_if(execution_policy &policy, InputIt first, InputIt last, @@ -169,7 +169,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE replace_copy_if(execution_policy &policy, InputIt first, InputIt last, @@ -192,7 +192,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE replace_copy(execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/reverse.h b/thrust/system/cuda/detail/reverse.h index 7c4cb867e..b0a85e08e 100644 --- a/thrust/system/cuda/detail/reverse.h +++ b/thrust/system/cuda/detail/reverse.h @@ -35,14 +35,14 @@ THRUST_NAMESPACE_BEGIN namespace cuda_cub { template -ResultIt __host__ __device__ +ResultIt _CCCL_HOST_DEVICE reverse_copy(execution_policy &policy, ItemsIt first, ItemsIt last, ResultIt result); template -void __host__ __device__ +void _CCCL_HOST_DEVICE reverse(execution_policy &policy, ItemsIt first, ItemsIt last); @@ -62,7 +62,7 @@ namespace cuda_cub { template -ResultIt __host__ __device__ +ResultIt _CCCL_HOST_DEVICE reverse_copy(execution_policy &policy, ItemsIt first, ItemsIt last, @@ -76,7 +76,7 @@ reverse_copy(execution_policy &policy, template -void __host__ __device__ +void _CCCL_HOST_DEVICE reverse(execution_policy &policy, ItemsIt first, ItemsIt last) diff --git a/thrust/system/cuda/detail/scan.h b/thrust/system/cuda/detail/scan.h index 7367c5293..1ce1f61fc 100644 --- a/thrust/system/cuda/detail/scan.h +++ b/thrust/system/cuda/detail/scan.h @@ -46,13 +46,13 @@ namespace cuda_cub namespace detail { -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt inclusive_scan_n_impl(thrust::cuda_cub::execution_policy &policy, InputIt first, Size num_items, @@ -123,14 +123,14 @@ OutputIt inclusive_scan_n_impl(thrust::cuda_cub::execution_policy &poli return result + num_items; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt exclusive_scan_n_impl(thrust::cuda_cub::execution_policy &policy, InputIt first, Size num_items, @@ -208,13 +208,13 @@ OutputIt exclusive_scan_n_impl(thrust::cuda_cub::execution_policy &poli // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt inclusive_scan_n(thrust::cuda_cub::execution_policy &policy, InputIt first, Size num_items, @@ -236,7 +236,7 @@ OutputIt inclusive_scan_n(thrust::cuda_cub::execution_policy &policy, } template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt inclusive_scan(thrust::cuda_cub::execution_policy &policy, InputIt first, InputIt last, @@ -253,7 +253,7 @@ OutputIt inclusive_scan(thrust::cuda_cub::execution_policy &policy, } template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt inclusive_scan(thrust::cuda_cub::execution_policy &policy, InputIt first, InputIt last, @@ -266,14 +266,14 @@ OutputIt inclusive_scan(thrust::cuda_cub::execution_policy &policy, thrust::plus<>{}); } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt exclusive_scan_n(thrust::cuda_cub::execution_policy &policy, InputIt first, Size num_items, @@ -302,7 +302,7 @@ template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt exclusive_scan(thrust::cuda_cub::execution_policy &policy, InputIt first, InputIt last, @@ -321,7 +321,7 @@ OutputIt exclusive_scan(thrust::cuda_cub::execution_policy &policy, } template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt exclusive_scan(thrust::cuda_cub::execution_policy &policy, InputIt first, InputIt last, @@ -337,7 +337,7 @@ OutputIt exclusive_scan(thrust::cuda_cub::execution_policy &policy, } template -__host__ __device__ +_CCCL_HOST_DEVICE OutputIt exclusive_scan(thrust::cuda_cub::execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/scan_by_key.h b/thrust/system/cuda/detail/scan_by_key.h index 0407779c6..bbcfd8e33 100644 --- a/thrust/system/cuda/detail/scan_by_key.h +++ b/thrust/system/cuda/detail/scan_by_key.h @@ -57,7 +57,7 @@ namespace cuda_cub namespace detail { -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE ValuesOutIt inclusive_scan_by_key_n( thrust::cuda_cub::execution_policy& policy, KeysInIt keys, @@ -168,7 +168,7 @@ ValuesOutIt inclusive_scan_by_key_n( return result + num_items; } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE ValuesOutIt exclusive_scan_by_key_n( thrust::cuda_cub::execution_policy& policy, KeysInIt keys, @@ -292,14 +292,14 @@ ValuesOutIt exclusive_scan_by_key_n( // Inclusive scan //--------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -ValOutputIt __host__ __device__ +ValOutputIt _CCCL_HOST_DEVICE inclusive_scan_by_key(execution_policy &policy, KeyInputIt key_first, KeyInputIt key_last, @@ -334,7 +334,7 @@ template -ValOutputIt __host__ __device__ +ValOutputIt _CCCL_HOST_DEVICE inclusive_scan_by_key(execution_policy &policy, KeyInputIt key_first, KeyInputIt key_last, @@ -355,7 +355,7 @@ template -ValOutputIt __host__ __device__ +ValOutputIt _CCCL_HOST_DEVICE inclusive_scan_by_key(execution_policy &policy, KeyInputIt key_first, KeyInputIt key_last, @@ -375,7 +375,7 @@ inclusive_scan_by_key(execution_policy &policy, // Exclusive scan //--------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -ValOutputIt __host__ __device__ +ValOutputIt _CCCL_HOST_DEVICE exclusive_scan_by_key(execution_policy &policy, KeyInputIt key_first, KeyInputIt key_last, @@ -421,7 +421,7 @@ template -ValOutputIt __host__ __device__ +ValOutputIt _CCCL_HOST_DEVICE exclusive_scan_by_key(execution_policy &policy, KeyInputIt key_first, KeyInputIt key_last, @@ -445,7 +445,7 @@ template -ValOutputIt __host__ __device__ +ValOutputIt _CCCL_HOST_DEVICE exclusive_scan_by_key(execution_policy &policy, KeyInputIt key_first, KeyInputIt key_last, @@ -467,7 +467,7 @@ template -ValOutputIt __host__ __device__ +ValOutputIt _CCCL_HOST_DEVICE exclusive_scan_by_key(execution_policy &policy, KeyInputIt key_first, KeyInputIt key_last, diff --git a/thrust/system/cuda/detail/scatter.h b/thrust/system/cuda/detail/scatter.h index e297d782d..a347fc218 100644 --- a/thrust/system/cuda/detail/scatter.h +++ b/thrust/system/cuda/detail/scatter.h @@ -39,7 +39,7 @@ template -void __host__ __device__ +void _CCCL_HOST_DEVICE scatter(execution_policy& policy, ItemsIt first, ItemsIt last, @@ -59,7 +59,7 @@ template -void __host__ __device__ +void _CCCL_HOST_DEVICE scatter_if(execution_policy& policy, ItemsIt first, ItemsIt last, @@ -83,7 +83,7 @@ template -void __host__ __device__ +void _CCCL_HOST_DEVICE scatter_if(execution_policy& policy, ItemsIt first, ItemsIt last, diff --git a/thrust/system/cuda/detail/set_operations.h b/thrust/system/cuda/detail/set_operations.h index 98bb4bb5d..a8641596c 100644 --- a/thrust/system/cuda/detail/set_operations.h +++ b/thrust/system/cuda/detail/set_operations.h @@ -1346,13 +1346,13 @@ namespace __set_operations { // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_difference(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1391,7 +1391,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_difference(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1412,13 +1412,13 @@ set_difference(execution_policy &policy, /*****************************/ -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_intersection(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1457,7 +1457,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_intersection(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1478,13 +1478,13 @@ set_intersection(execution_policy &policy, /*****************************/ -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_symmetric_difference(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1523,7 +1523,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_symmetric_difference(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1543,13 +1543,13 @@ set_symmetric_difference(execution_policy &policy, /*****************************/ -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_union(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1588,7 +1588,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE set_union(execution_policy &policy, ItemsIt1 items1_first, ItemsIt1 items1_last, @@ -1615,7 +1615,7 @@ set_union(execution_policy &policy, /*****************************/ -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_difference_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -1670,7 +1670,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_difference_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -1696,7 +1696,7 @@ set_difference_by_key(execution_policy &policy, /*****************************/ -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_intersection_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -1749,7 +1749,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_intersection_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -1773,7 +1773,7 @@ set_intersection_by_key(execution_policy &policy, /*****************************/ -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_symmetric_difference_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -1829,7 +1829,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_symmetric_difference_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -1855,7 +1855,7 @@ set_symmetric_difference_by_key(execution_policy &policy, /*****************************/ -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_union_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -1910,7 +1910,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE set_union_by_key(execution_policy &policy, KeysIt1 keys1_first, KeysIt1 keys1_last, diff --git a/thrust/system/cuda/detail/sort.h b/thrust/system/cuda/detail/sort.h index 993418615..4ba062263 100644 --- a/thrust/system/cuda/detail/sort.h +++ b/thrust/system/cuda/detail/sort.h @@ -490,9 +490,9 @@ namespace __smart_sort { //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -void __host__ __device__ +void _CCCL_HOST_DEVICE sort(execution_policy& policy, ItemsIt first, ItemsIt last, @@ -509,9 +509,9 @@ sort(execution_policy& policy, (thrust::sort(cvt_to_seq(derived_cast(policy)), first, last, compare_op);)); } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -void __host__ __device__ +void _CCCL_HOST_DEVICE stable_sort(execution_policy& policy, ItemsIt first, ItemsIt last, @@ -531,9 +531,9 @@ stable_sort(execution_policy& policy, compare_op);)); } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -void __host__ __device__ +void _CCCL_HOST_DEVICE sort_by_key(execution_policy& policy, KeysIt keys_first, KeysIt keys_last, @@ -554,12 +554,12 @@ sort_by_key(execution_policy& policy, compare_op);)); } -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -void __host__ __device__ +void _CCCL_HOST_DEVICE stable_sort_by_key(execution_policy &policy, KeysIt keys_first, KeysIt keys_last, @@ -583,7 +583,7 @@ stable_sort_by_key(execution_policy &policy, // API with default comparator template -void __host__ __device__ +void _CCCL_HOST_DEVICE sort(execution_policy& policy, ItemsIt first, ItemsIt last) @@ -593,7 +593,7 @@ sort(execution_policy& policy, } template -void __host__ __device__ +void _CCCL_HOST_DEVICE stable_sort(execution_policy& policy, ItemsIt first, ItemsIt last) @@ -603,7 +603,7 @@ stable_sort(execution_policy& policy, } template -void __host__ __device__ +void _CCCL_HOST_DEVICE sort_by_key(execution_policy& policy, KeysIt keys_first, KeysIt keys_last, @@ -614,7 +614,7 @@ sort_by_key(execution_policy& policy, } template -void __host__ __device__ +void _CCCL_HOST_DEVICE stable_sort_by_key( execution_policy& policy, KeysIt keys_first, KeysIt keys_last, ValuesIt values) { diff --git a/thrust/system/cuda/detail/swap_ranges.h b/thrust/system/cuda/detail/swap_ranges.h index 8f9e4fa8a..aaa837ca3 100644 --- a/thrust/system/cuda/detail/swap_ranges.h +++ b/thrust/system/cuda/detail/swap_ranges.h @@ -77,7 +77,7 @@ namespace __swap_ranges { template -ItemsIt2 __host__ __device__ +ItemsIt2 _CCCL_HOST_DEVICE swap_ranges(execution_policy &policy, ItemsIt1 first1, ItemsIt1 last1, diff --git a/thrust/system/cuda/detail/tabulate.h b/thrust/system/cuda/detail/tabulate.h index 67edb8574..af6b30e13 100644 --- a/thrust/system/cuda/detail/tabulate.h +++ b/thrust/system/cuda/detail/tabulate.h @@ -46,11 +46,11 @@ namespace __tabulate { Iterator items; TabulateOp op; - __host__ __device__ + _CCCL_HOST_DEVICE functor(Iterator items_, TabulateOp op_) : items(items_), op(op_) {} - void __device__ operator()(Size idx) + void _CCCL_DEVICE operator()(Size idx) { items[idx] = op(idx); } @@ -61,7 +61,7 @@ namespace __tabulate { template -void __host__ __device__ +void _CCCL_HOST_DEVICE tabulate(execution_policy& policy, Iterator first, Iterator last, diff --git a/thrust/system/cuda/detail/terminate.h b/thrust/system/cuda/detail/terminate.h index 226c9d5ac..b0ebc76a6 100644 --- a/thrust/system/cuda/detail/terminate.h +++ b/thrust/system/cuda/detail/terminate.h @@ -40,14 +40,14 @@ namespace detail { -inline __device__ +inline _CCCL_DEVICE void terminate() { thrust::cuda_cub::terminate(); } -inline __host__ __device__ +inline _CCCL_HOST_DEVICE void terminate_with_message(const char* message) { printf("%s\n", message); diff --git a/thrust/system/cuda/detail/transform_reduce.h b/thrust/system/cuda/detail/transform_reduce.h index 60efaae5a..5df5d36e7 100644 --- a/thrust/system/cuda/detail/transform_reduce.h +++ b/thrust/system/cuda/detail/transform_reduce.h @@ -41,7 +41,7 @@ template -T __host__ __device__ +T _CCCL_HOST_DEVICE transform_reduce(execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/transform_scan.h b/thrust/system/cuda/detail/transform_scan.h index 8f14ca8f7..7c42c3ed2 100644 --- a/thrust/system/cuda/detail/transform_scan.h +++ b/thrust/system/cuda/detail/transform_scan.h @@ -43,7 +43,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE transform_inclusive_scan(execution_policy &policy, InputIt first, InputIt last, @@ -76,7 +76,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE transform_exclusive_scan(execution_policy &policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/uninitialized_copy.h b/thrust/system/cuda/detail/uninitialized_copy.h index f21b7c0d6..f997a289a 100644 --- a/thrust/system/cuda/detail/uninitialized_copy.h +++ b/thrust/system/cuda/detail/uninitialized_copy.h @@ -75,7 +75,7 @@ template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE uninitialized_copy_n(execution_policy &policy, InputIt first, Size count, @@ -93,7 +93,7 @@ uninitialized_copy_n(execution_policy &policy, template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE uninitialized_copy(execution_policy& policy, InputIt first, InputIt last, diff --git a/thrust/system/cuda/detail/uninitialized_fill.h b/thrust/system/cuda/detail/uninitialized_fill.h index 96b970201..0395ba50e 100644 --- a/thrust/system/cuda/detail/uninitialized_fill.h +++ b/thrust/system/cuda/detail/uninitialized_fill.h @@ -73,7 +73,7 @@ template -Iterator __host__ __device__ +Iterator _CCCL_HOST_DEVICE uninitialized_fill_n(execution_policy& policy, Iterator first, Size count, @@ -91,7 +91,7 @@ uninitialized_fill_n(execution_policy& policy, template -void __host__ __device__ +void _CCCL_HOST_DEVICE uninitialized_fill(execution_policy& policy, Iterator first, Iterator last, diff --git a/thrust/system/cuda/detail/unique.h b/thrust/system/cuda/detail/unique.h index 653ffa79a..25867fd6f 100644 --- a/thrust/system/cuda/detail/unique.h +++ b/thrust/system/cuda/detail/unique.h @@ -50,7 +50,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ ForwardIterator +_CCCL_HOST_DEVICE ForwardIterator unique( const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -61,7 +61,7 @@ template -__host__ __device__ OutputIterator +_CCCL_HOST_DEVICE OutputIterator unique_copy( const thrust::detail::execution_policy_base &exec, InputIterator first, @@ -72,7 +72,7 @@ unique_copy( template -__host__ __device__ typename thrust::iterator_traits::difference_type +_CCCL_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count( const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -713,12 +713,12 @@ namespace __unique { // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE unique_copy(execution_policy &policy, InputIt first, InputIt last, @@ -738,7 +738,7 @@ unique_copy(execution_policy &policy, template -OutputIt __host__ __device__ +OutputIt _CCCL_HOST_DEVICE unique_copy(execution_policy &policy, InputIt first, InputIt last, @@ -750,11 +750,11 @@ unique_copy(execution_policy &policy, -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -ForwardIt __host__ __device__ +ForwardIt _CCCL_HOST_DEVICE unique(execution_policy &policy, ForwardIt first, ForwardIt last, @@ -772,7 +772,7 @@ unique(execution_policy &policy, template -ForwardIt __host__ __device__ +ForwardIt _CCCL_HOST_DEVICE unique(execution_policy &policy, ForwardIt first, ForwardIt last) @@ -785,7 +785,7 @@ unique(execution_policy &policy, template struct zip_adj_not_predicate { template - bool __host__ __device__ operator()(TupleType&& tuple) { + bool _CCCL_HOST_DEVICE operator()(TupleType&& tuple) { return !binary_pred(thrust::get<0>(tuple), thrust::get<1>(tuple)); } @@ -793,12 +793,12 @@ struct zip_adj_not_predicate { }; -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template typename thrust::iterator_traits::difference_type -__host__ __device__ +_CCCL_HOST_DEVICE unique_count(execution_policy &policy, ForwardIt first, ForwardIt last, diff --git a/thrust/system/cuda/detail/unique_by_key.h b/thrust/system/cuda/detail/unique_by_key.h index d5ce8e786..be8ebc865 100644 --- a/thrust/system/cuda/detail/unique_by_key.h +++ b/thrust/system/cuda/detail/unique_by_key.h @@ -53,7 +53,7 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ thrust::pair +_CCCL_HOST_DEVICE thrust::pair unique_by_key( const thrust::detail::execution_policy_base &exec, ForwardIterator1 keys_first, @@ -64,7 +64,7 @@ template -__host__ __device__ thrust::pair +_CCCL_HOST_DEVICE thrust::pair unique_by_key_copy( const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -805,14 +805,14 @@ namespace __unique_by_key { //------------------------- -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE unique_by_key_copy(execution_policy &policy, KeyInputIt keys_first, KeyInputIt keys_last, @@ -845,7 +845,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE unique_by_key_copy(execution_policy &policy, KeyInputIt keys_first, KeyInputIt keys_last, @@ -867,7 +867,7 @@ template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE unique_by_key(execution_policy &policy, KeyInputIt keys_first, KeyInputIt keys_last, @@ -894,7 +894,7 @@ unique_by_key(execution_policy &policy, template -pair __host__ __device__ +pair _CCCL_HOST_DEVICE unique_by_key(execution_policy &policy, KeyInputIt keys_first, KeyInputIt keys_last, diff --git a/thrust/system/cuda/detail/util.h b/thrust/system/cuda/detail/util.h index 4df02332c..0a08c8457 100644 --- a/thrust/system/cuda/detail/util.h +++ b/thrust/system/cuda/detail/util.h @@ -43,7 +43,7 @@ THRUST_NAMESPACE_BEGIN namespace cuda_cub { -inline __host__ __device__ +inline _CCCL_HOST_DEVICE cudaStream_t default_stream() { @@ -56,7 +56,7 @@ default_stream() // Fallback implementation of the customization point. template -__host__ __device__ +_CCCL_HOST_DEVICE cudaStream_t get_stream(execution_policy &) { @@ -65,7 +65,7 @@ get_stream(execution_policy &) // Entry point/interface. template -__host__ __device__ cudaStream_t +_CCCL_HOST_DEVICE cudaStream_t stream(execution_policy &policy) { return get_stream(derived_cast(policy)); @@ -74,7 +74,7 @@ stream(execution_policy &policy) // Fallback implementation of the customization point. template -__host__ __device__ +_CCCL_HOST_DEVICE bool must_perform_optional_stream_synchronization(execution_policy &) { @@ -83,7 +83,7 @@ must_perform_optional_stream_synchronization(execution_policy &) // Entry point/interface. template -__host__ __device__ bool +_CCCL_HOST_DEVICE bool must_perform_optional_synchronization(execution_policy &policy) { return must_perform_optional_stream_synchronization(derived_cast(policy)); @@ -91,9 +91,9 @@ must_perform_optional_synchronization(execution_policy &policy) // Fallback implementation of the customization point. -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE cudaError_t synchronize_stream(execution_policy &policy) { @@ -102,7 +102,7 @@ synchronize_stream(execution_policy &policy) // Entry point/interface. template -__host__ __device__ +_CCCL_HOST_DEVICE cudaError_t synchronize(Policy &policy) { @@ -110,9 +110,9 @@ synchronize(Policy &policy) } // Fallback implementation of the customization point. -__thrust_exec_check_disable__ +_CCCL_EXEC_CHECK_DISABLE template -__host__ __device__ +_CCCL_HOST_DEVICE cudaError_t synchronize_stream_optional(execution_policy &policy) { @@ -132,7 +132,7 @@ synchronize_stream_optional(execution_policy &policy) // Entry point/interface. template -__host__ __device__ +_CCCL_HOST_DEVICE cudaError_t synchronize_optional(Policy &policy) { @@ -178,7 +178,7 @@ trivial_copy_to_device(Type * dst, } template -__host__ __device__ cudaError_t +_CCCL_HOST_DEVICE cudaError_t trivial_copy_device_to_device(Policy & policy, Type * dst, Type const *src, @@ -198,13 +198,13 @@ trivial_copy_device_to_device(Policy & policy, return status; } -inline void __host__ __device__ +inline void _CCCL_HOST_DEVICE terminate() { NV_IF_TARGET(NV_IS_HOST, (std::terminate();), (asm("trap;");)); } -__host__ __device__ +_CCCL_HOST_DEVICE inline void throw_on_error(cudaError_t status) { // Clear the global CUDA error state which may have been set by the last @@ -247,7 +247,7 @@ inline void throw_on_error(cudaError_t status) } } -__host__ __device__ +_CCCL_HOST_DEVICE inline void throw_on_error(cudaError_t status, char const *msg) { // Clear the global CUDA error state which may have been set by the last @@ -308,7 +308,7 @@ struct transform_input_iterator_t InputIt input; mutable UnaryOp op; - __host__ __device__ __forceinline__ + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE transform_input_iterator_t(InputIt input, UnaryOp op) : input(input), op(op) {} @@ -318,7 +318,7 @@ struct transform_input_iterator_t // UnaryOp might not be copy assignable, such as when it is a lambda. Define // an explicit copy assignment operator that doesn't try to assign it. - __host__ __device__ + _CCCL_HOST_DEVICE self_t& operator=(const self_t& o) { input = o.input; @@ -326,7 +326,7 @@ struct transform_input_iterator_t } /// Postfix increment - __host__ __device__ __forceinline__ self_t operator++(int) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++(int) { self_t retval = *this; ++input; @@ -334,71 +334,71 @@ struct transform_input_iterator_t } /// Prefix increment - __host__ __device__ __forceinline__ self_t operator++() + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++() { ++input; return *this; } /// Indirection - __host__ __device__ __forceinline__ reference operator*() const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() const { typename thrust::iterator_value::type x = *input; return op(x); } /// Indirection - __host__ __device__ __forceinline__ reference operator*() + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() { typename thrust::iterator_value::type x = *input; return op(x); } /// Addition - __host__ __device__ __forceinline__ self_t operator+(difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator+(difference_type n) const { return self_t(input + n, op); } /// Addition assignment - __host__ __device__ __forceinline__ self_t &operator+=(difference_type n) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t &operator+=(difference_type n) { input += n; return *this; } /// Subtraction - __host__ __device__ __forceinline__ self_t operator-(difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator-(difference_type n) const { return self_t(input - n, op); } /// Subtraction assignment - __host__ __device__ __forceinline__ self_t &operator-=(difference_type n) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t &operator-=(difference_type n) { input -= n; return *this; } /// Distance - __host__ __device__ __forceinline__ difference_type operator-(self_t other) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE difference_type operator-(self_t other) const { return input - other.input; } /// Array subscript - __host__ __device__ __forceinline__ reference operator[](difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator[](difference_type n) const { return op(input[n]); } /// Equal to - __host__ __device__ __forceinline__ bool operator==(const self_t &rhs) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator==(const self_t &rhs) const { return (input == rhs.input); } /// Not equal to - __host__ __device__ __forceinline__ bool operator!=(const self_t &rhs) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator!=(const self_t &rhs) const { return (input != rhs.input); } @@ -421,7 +421,7 @@ struct transform_pair_of_input_iterators_t InputIt2 input2; mutable BinaryOp op; - __host__ __device__ __forceinline__ + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE transform_pair_of_input_iterators_t(InputIt1 input1_, InputIt2 input2_, BinaryOp op_) @@ -433,7 +433,7 @@ struct transform_pair_of_input_iterators_t // BinaryOp might not be copy assignable, such as when it is a lambda. // Define an explicit copy assignment operator that doesn't try to assign it. - __host__ __device__ + _CCCL_HOST_DEVICE self_t& operator=(const self_t& o) { input1 = o.input1; @@ -442,7 +442,7 @@ struct transform_pair_of_input_iterators_t } /// Postfix increment - __host__ __device__ __forceinline__ self_t operator++(int) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++(int) { self_t retval = *this; ++input1; @@ -451,7 +451,7 @@ struct transform_pair_of_input_iterators_t } /// Prefix increment - __host__ __device__ __forceinline__ self_t operator++() + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++() { ++input1; ++input2; @@ -459,24 +459,24 @@ struct transform_pair_of_input_iterators_t } /// Indirection - __host__ __device__ __forceinline__ reference operator*() const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() const { return op(*input1, *input2); } /// Indirection - __host__ __device__ __forceinline__ reference operator*() + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() { return op(*input1, *input2); } /// Addition - __host__ __device__ __forceinline__ self_t operator+(difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator+(difference_type n) const { return self_t(input1 + n, input2 + n, op); } /// Addition assignment - __host__ __device__ __forceinline__ self_t &operator+=(difference_type n) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t &operator+=(difference_type n) { input1 += n; input2 += n; @@ -484,13 +484,13 @@ struct transform_pair_of_input_iterators_t } /// Subtraction - __host__ __device__ __forceinline__ self_t operator-(difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator-(difference_type n) const { return self_t(input1 - n, input2 - n, op); } /// Subtraction assignment - __host__ __device__ __forceinline__ self_t &operator-=(difference_type n) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t &operator-=(difference_type n) { input1 -= n; input2 -= n; @@ -498,25 +498,25 @@ struct transform_pair_of_input_iterators_t } /// Distance - __host__ __device__ __forceinline__ difference_type operator-(self_t other) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE difference_type operator-(self_t other) const { return input1 - other.input1; } /// Array subscript - __host__ __device__ __forceinline__ reference operator[](difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator[](difference_type n) const { return op(input1[n], input2[n]); } /// Equal to - __host__ __device__ __forceinline__ bool operator==(const self_t &rhs) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator==(const self_t &rhs) const { return (input1 == rhs.input1) && (input2 == rhs.input2); } /// Not equal to - __host__ __device__ __forceinline__ bool operator!=(const self_t &rhs) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator!=(const self_t &rhs) const { return (input1 != rhs.input1) || (input2 != rhs.input2); } @@ -527,14 +527,14 @@ struct transform_pair_of_input_iterators_t struct identity { template - __host__ __device__ T const & + _CCCL_HOST_DEVICE T const & operator()(T const &t) const { return t; } template - __host__ __device__ T & + _CCCL_HOST_DEVICE T & operator()(T &t) const { return t; @@ -554,11 +554,11 @@ struct counting_iterator_t T count; - __host__ __device__ __forceinline__ + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE counting_iterator_t(T count_) : count(count_) {} /// Postfix increment - __host__ __device__ __forceinline__ self_t operator++(int) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++(int) { self_t retval = *this; ++count; @@ -566,70 +566,70 @@ struct counting_iterator_t } /// Prefix increment - __host__ __device__ __forceinline__ self_t operator++() + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator++() { ++count; return *this; } /// Indirection - __host__ __device__ __forceinline__ reference operator*() const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() const { return count; } /// Indirection - __host__ __device__ __forceinline__ reference operator*() + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator*() { return count; } /// Addition - __host__ __device__ __forceinline__ self_t operator+(difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator+(difference_type n) const { return self_t(count + n); } /// Addition assignment - __host__ __device__ __forceinline__ self_t &operator+=(difference_type n) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t &operator+=(difference_type n) { count += n; return *this; } /// Subtraction - __host__ __device__ __forceinline__ self_t operator-(difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t operator-(difference_type n) const { return self_t(count - n); } /// Subtraction assignment - __host__ __device__ __forceinline__ self_t &operator-=(difference_type n) + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE self_t &operator-=(difference_type n) { count -= n; return *this; } /// Distance - __host__ __device__ __forceinline__ difference_type operator-(self_t other) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE difference_type operator-(self_t other) const { return count - other.count; } /// Array subscript - __host__ __device__ __forceinline__ reference operator[](difference_type n) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE reference operator[](difference_type n) const { return count + n; } /// Equal to - __host__ __device__ __forceinline__ bool operator==(const self_t &rhs) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator==(const self_t &rhs) const { return (count == rhs.count); } /// Not equal to - __host__ __device__ __forceinline__ bool operator!=(const self_t &rhs) const + _CCCL_HOST_DEVICE _CCCL_FORCEINLINE bool operator!=(const self_t &rhs) const { return (count != rhs.count); } diff --git a/thrust/system/cuda/error.h b/thrust/system/cuda/error.h index b180f8347..6ea16aced 100644 --- a/thrust/system/cuda/error.h +++ b/thrust/system/cuda/error.h @@ -24,7 +24,6 @@ #include #include #include -#include THRUST_NAMESPACE_BEGIN diff --git a/thrust/system/cuda/future.h b/thrust/system/cuda/future.h index 79bfc9134..a003a1d58 100644 --- a/thrust/system/cuda/future.h +++ b/thrust/system/cuda/future.h @@ -29,7 +29,7 @@ template struct unique_eager_future; template -__host__ +_CCCL_HOST unique_eager_event when_all(Events&&... evs); }} // namespace system::cuda @@ -52,14 +52,14 @@ using thrust::system::cuda::when_all; } // namespace cuda template -__host__ +_CCCL_HOST thrust::cuda::unique_eager_event unique_eager_event_type( thrust::cuda::execution_policy const& ) noexcept; template -__host__ +_CCCL_HOST thrust::cuda::unique_eager_future unique_eager_future_type( thrust::cuda::execution_policy const& diff --git a/thrust/system/cuda/memory.h b/thrust/system/cuda/memory.h index eb8020adb..74390a32b 100644 --- a/thrust/system/cuda/memory.h +++ b/thrust/system/cuda/memory.h @@ -41,7 +41,7 @@ namespace cuda_cub * \see cuda::free * \see std::malloc */ -inline __host__ __device__ pointer malloc(std::size_t n); +inline _CCCL_HOST_DEVICE pointer malloc(std::size_t n); /*! Allocates a typed area of memory available to Thrust's cuda system. * \param n Number of elements to allocate. @@ -54,7 +54,7 @@ inline __host__ __device__ pointer malloc(std::size_t n); * \see std::malloc */ template -inline __host__ __device__ pointer malloc(std::size_t n); +inline _CCCL_HOST_DEVICE pointer malloc(std::size_t n); /*! Deallocates an area of memory previously allocated by cuda::malloc. * \param ptr A cuda::pointer pointing to the beginning of an area @@ -62,7 +62,7 @@ inline __host__ __device__ pointer malloc(std::size_t n); * \see cuda::malloc * \see std::free */ -inline __host__ __device__ void free(pointer ptr); +inline _CCCL_HOST_DEVICE void free(pointer ptr); /*! \p cuda::allocator is the default allocator used by the \p cuda system's * containers such as cuda::vector if no user-specified allocator is diff --git a/thrust/system/cuda/memory_resource.h b/thrust/system/cuda/memory_resource.h index 4bf534e40..bf2e3959b 100644 --- a/thrust/system/cuda/memory_resource.h +++ b/thrust/system/cuda/memory_resource.h @@ -23,7 +23,6 @@ #include #include -#include #include #include #include diff --git a/thrust/system/detail/generic/adjacent_difference.h b/thrust/system/detail/generic/adjacent_difference.h index 43592e15b..e91619b32 100644 --- a/thrust/system/detail/generic/adjacent_difference.h +++ b/thrust/system/detail/generic/adjacent_difference.h @@ -34,14 +34,14 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(thrust::execution_policy &exec, InputIterator first, InputIterator last, OutputIterator result); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(thrust::execution_policy &exec, InputIterator first, InputIterator last, OutputIterator result, diff --git a/thrust/system/detail/generic/adjacent_difference.inl b/thrust/system/detail/generic/adjacent_difference.inl index 504129328..0f3a98111 100644 --- a/thrust/system/detail/generic/adjacent_difference.inl +++ b/thrust/system/detail/generic/adjacent_difference.inl @@ -34,7 +34,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(thrust::execution_policy &exec, InputIterator first, InputIterator last, OutputIterator result) @@ -47,7 +47,7 @@ OutputIterator adjacent_difference(thrust::execution_policy &exec template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(thrust::execution_policy &exec, InputIterator first, InputIterator last, OutputIterator result, diff --git a/thrust/system/detail/generic/advance.h b/thrust/system/detail/generic/advance.h index 4d6562e00..2d88b5e8a 100644 --- a/thrust/system/detail/generic/advance.h +++ b/thrust/system/detail/generic/advance.h @@ -28,7 +28,7 @@ namespace generic { template -__host__ __device__ +THRUST_HOST_DEVICE void advance(InputIterator& i, Distance n); } // end namespace generic diff --git a/thrust/system/detail/generic/advance.inl b/thrust/system/detail/generic/advance.inl index 21555ebb0..b639fdfdf 100644 --- a/thrust/system/detail/generic/advance.inl +++ b/thrust/system/detail/generic/advance.inl @@ -30,9 +30,9 @@ namespace generic namespace detail { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void advance(InputIterator& i, Distance n, thrust::incrementable_traversal_tag) { while(n) @@ -42,9 +42,9 @@ void advance(InputIterator& i, Distance n, thrust::incrementable_traversal_tag) } // end while } // end advance() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void advance(InputIterator& i, Distance n, thrust::random_access_traversal_tag) { i += n; @@ -53,7 +53,7 @@ void advance(InputIterator& i, Distance n, thrust::random_access_traversal_tag) } // end detail template -__host__ __device__ +THRUST_HOST_DEVICE void advance(InputIterator& i, Distance n) { // dispatch on iterator traversal diff --git a/thrust/system/detail/generic/binary_search.h b/thrust/system/detail/generic/binary_search.h index 6603f6c30..19368fdba 100644 --- a/thrust/system/detail/generic/binary_search.h +++ b/thrust/system/detail/generic/binary_search.h @@ -34,14 +34,14 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, const T& value); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -50,14 +50,14 @@ ForwardIterator lower_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, const T& value); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -66,14 +66,14 @@ ForwardIterator upper_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, const T& value); template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -82,7 +82,7 @@ bool binary_search(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -92,7 +92,7 @@ OutputIterator lower_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -103,7 +103,7 @@ OutputIterator lower_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -113,7 +113,7 @@ OutputIterator upper_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -124,7 +124,7 @@ OutputIterator upper_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -134,7 +134,7 @@ OutputIterator binary_search(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -145,7 +145,7 @@ OutputIterator binary_search(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(thrust::execution_policy &exec, ForwardIterator first, @@ -154,7 +154,7 @@ equal_range(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(thrust::execution_policy &exec, ForwardIterator first, diff --git a/thrust/system/detail/generic/binary_search.inl b/thrust/system/detail/generic/binary_search.inl index c85f9a479..5470283f6 100644 --- a/thrust/system/detail/generic/binary_search.inl +++ b/thrust/system/detail/generic/binary_search.inl @@ -55,7 +55,7 @@ namespace detail struct lbf { template - __host__ __device__ + THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp) { @@ -67,7 +67,7 @@ struct lbf struct ubf { template - __host__ __device__ + THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp) { @@ -79,7 +79,7 @@ struct ubf struct bsf { template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp) { RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(begin, end, value, comp); @@ -99,12 +99,12 @@ struct binary_search_functor StrictWeakOrdering comp; BinarySearchFunction func; - __host__ __device__ + THRUST_HOST_DEVICE binary_search_functor(ForwardIterator begin, ForwardIterator end, StrictWeakOrdering comp, BinarySearchFunction func) : begin(begin), end(end), comp(comp), func(func) {} template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(Tuple t) { thrust::get<1>(t) = func(begin, end, thrust::get<0>(t), comp); @@ -114,7 +114,7 @@ struct binary_search_functor // Vector Implementation template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -136,7 +136,7 @@ OutputIterator binary_search(thrust::execution_policy &exec, // Scalar Implementation template -__host__ __device__ +THRUST_HOST_DEVICE OutputType binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -183,7 +183,7 @@ OutputType binary_search(thrust::execution_policy &exec, struct binary_search_less { template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T1& lhs, const T2& rhs) const { return lhs < rhs; @@ -200,7 +200,7 @@ struct binary_search_less template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -211,7 +211,7 @@ ForwardIterator lower_bound(thrust::execution_policy &exec, } template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -225,7 +225,7 @@ ForwardIterator lower_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -237,7 +237,7 @@ ForwardIterator upper_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -251,7 +251,7 @@ ForwardIterator upper_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -262,7 +262,7 @@ bool binary_search(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -279,7 +279,7 @@ bool binary_search(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -293,7 +293,7 @@ OutputIterator lower_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator lower_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -307,7 +307,7 @@ OutputIterator lower_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -321,7 +321,7 @@ OutputIterator upper_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator upper_bound(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -335,7 +335,7 @@ OutputIterator upper_bound(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -349,7 +349,7 @@ OutputIterator binary_search(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator binary_search(thrust::execution_policy &exec, ForwardIterator begin, ForwardIterator end, @@ -363,7 +363,7 @@ OutputIterator binary_search(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(thrust::execution_policy &exec, ForwardIterator first, @@ -375,7 +375,7 @@ equal_range(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair equal_range(thrust::execution_policy &exec, ForwardIterator first, diff --git a/thrust/system/detail/generic/copy.h b/thrust/system/detail/generic/copy.h index 36ac71899..751352fad 100644 --- a/thrust/system/detail/generic/copy.h +++ b/thrust/system/detail/generic/copy.h @@ -31,7 +31,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -42,7 +42,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(thrust::execution_policy &exec, InputIterator first, Size n, diff --git a/thrust/system/detail/generic/copy.inl b/thrust/system/detail/generic/copy.inl index 34d66baa6..184b6bf99 100644 --- a/thrust/system/detail/generic/copy.inl +++ b/thrust/system/detail/generic/copy.inl @@ -38,7 +38,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -53,7 +53,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(thrust::execution_policy &exec, InputIterator first, Size n, diff --git a/thrust/system/detail/generic/copy_if.h b/thrust/system/detail/generic/copy_if.h index 6a13edfda..03d8b0f42 100644 --- a/thrust/system/detail/generic/copy_if.h +++ b/thrust/system/detail/generic/copy_if.h @@ -32,7 +32,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -45,7 +45,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/generic/copy_if.inl b/thrust/system/detail/generic/copy_if.inl index 5a6edd72e..89b3d94b9 100644 --- a/thrust/system/detail/generic/copy_if.inl +++ b/thrust/system/detail/generic/copy_if.inl @@ -49,7 +49,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -99,7 +99,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -119,7 +119,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/generic/count.h b/thrust/system/detail/generic/count.h index 295d36e6b..21bed5c33 100644 --- a/thrust/system/detail/generic/count.h +++ b/thrust/system/detail/generic/count.h @@ -30,13 +30,13 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count(thrust::execution_policy &exec, InputIterator first, InputIterator last, const EqualityComparable& value); template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred); diff --git a/thrust/system/detail/generic/count.inl b/thrust/system/detail/generic/count.inl index 6c463c7bd..19369a52c 100644 --- a/thrust/system/detail/generic/count.inl +++ b/thrust/system/detail/generic/count.inl @@ -33,11 +33,11 @@ namespace generic template struct count_if_transform { - __host__ __device__ + THRUST_HOST_DEVICE count_if_transform(Predicate _pred) : pred(_pred){} - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE CountType operator()(const InputType& val) { if(pred(val)) @@ -51,7 +51,7 @@ struct count_if_transform template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count(thrust::execution_policy &exec, InputIterator first, InputIterator last, const EqualityComparable& value) { @@ -62,7 +62,7 @@ count(thrust::execution_policy &exec, InputIterator first, InputI template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type count_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) { diff --git a/thrust/system/detail/generic/distance.h b/thrust/system/detail/generic/distance.h index 4627376b5..e275d941b 100644 --- a/thrust/system/detail/generic/distance.h +++ b/thrust/system/detail/generic/distance.h @@ -29,7 +29,7 @@ namespace generic { template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type distance(InputIterator first, InputIterator last); diff --git a/thrust/system/detail/generic/distance.inl b/thrust/system/detail/generic/distance.inl index 46bad7ba7..3558c1667 100644 --- a/thrust/system/detail/generic/distance.inl +++ b/thrust/system/detail/generic/distance.inl @@ -31,9 +31,9 @@ namespace detail { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type distance(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag) { @@ -49,9 +49,9 @@ inline __host__ __device__ } // end advance() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type distance(InputIterator first, InputIterator last, thrust::random_access_traversal_tag) { @@ -61,9 +61,9 @@ inline __host__ __device__ } // end detail -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type distance(InputIterator first, InputIterator last) { diff --git a/thrust/system/detail/generic/equal.h b/thrust/system/detail/generic/equal.h index 4afd88d00..81881459b 100644 --- a/thrust/system/detail/generic/equal.h +++ b/thrust/system/detail/generic/equal.h @@ -29,12 +29,12 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2); template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred); diff --git a/thrust/system/detail/generic/equal.inl b/thrust/system/detail/generic/equal.inl index c023070cd..b6c0f991b 100644 --- a/thrust/system/detail/generic/equal.inl +++ b/thrust/system/detail/generic/equal.inl @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) { typedef typename thrust::iterator_traits::value_type InputType1; @@ -42,10 +42,10 @@ bool equal(thrust::execution_policy &exec, InputIterator1 first1, // the == below could be a __host__ function in the case of std::vector::iterator::operator== -// we make this exception for equal and use __thrust_exec_check_disable__ because it is used in vector's implementation -__thrust_exec_check_disable__ +// we make this exception for equal and use nv_exec_check_disable because it is used in vector's implementation +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred) { return thrust::mismatch(exec, first1, last1, first2, binary_pred).first == last1; diff --git a/thrust/system/detail/generic/extrema.h b/thrust/system/detail/generic/extrema.h index e3b447958..1eebda87d 100644 --- a/thrust/system/detail/generic/extrema.h +++ b/thrust/system/detail/generic/extrema.h @@ -35,14 +35,14 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -50,14 +50,14 @@ ForwardIterator max_element(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -65,14 +65,14 @@ ForwardIterator min_element(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last); template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/extrema.inl b/thrust/system/detail/generic/extrema.inl index 744d137de..d088d4bff 100644 --- a/thrust/system/detail/generic/extrema.inl +++ b/thrust/system/detail/generic/extrema.inl @@ -56,10 +56,10 @@ struct min_element_reduction { BinaryPredicate comp; - __host__ __device__ + THRUST_HOST_DEVICE min_element_reduction(BinaryPredicate comp) : comp(comp){} - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple operator()(const thrust::tuple& lhs, const thrust::tuple& rhs ) @@ -83,10 +83,10 @@ struct max_element_reduction { BinaryPredicate comp; - __host__ __device__ + THRUST_HOST_DEVICE max_element_reduction(BinaryPredicate comp) : comp(comp){} - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple operator()(const thrust::tuple& lhs, const thrust::tuple& rhs ) @@ -112,10 +112,10 @@ struct minmax_element_reduction { BinaryPredicate comp; - __host__ __device__ + THRUST_HOST_DEVICE minmax_element_reduction(BinaryPredicate comp) : comp(comp){} - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple< thrust::tuple, thrust::tuple > operator()(const thrust::tuple< thrust::tuple, thrust::tuple >& lhs, const thrust::tuple< thrust::tuple, thrust::tuple >& rhs ) @@ -130,7 +130,7 @@ struct minmax_element_reduction template struct duplicate_tuple { - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple< thrust::tuple, thrust::tuple > operator()(const thrust::tuple& t) { @@ -143,7 +143,7 @@ struct duplicate_tuple template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last) @@ -155,7 +155,7 @@ ForwardIterator min_element(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -180,7 +180,7 @@ ForwardIterator min_element(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last) @@ -192,7 +192,7 @@ ForwardIterator max_element(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -217,7 +217,7 @@ ForwardIterator max_element(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last) @@ -229,7 +229,7 @@ thrust::pair minmax_element(thrust::execution_p template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/fill.h b/thrust/system/detail/generic/fill.h index 5a881359b..42065806e 100644 --- a/thrust/system/detail/generic/fill.h +++ b/thrust/system/detail/generic/fill.h @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator fill_n(thrust::execution_policy &exec, OutputIterator first, Size n, @@ -43,7 +43,7 @@ __host__ __device__ } template -__host__ __device__ +THRUST_HOST_DEVICE void fill(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/find.h b/thrust/system/detail/generic/find.h index 6db441d02..8498a1c20 100644 --- a/thrust/system/detail/generic/find.h +++ b/thrust/system/detail/generic/find.h @@ -30,7 +30,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -38,7 +38,7 @@ InputIterator find(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -46,7 +46,7 @@ InputIterator find_if(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if_not(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/find.inl b/thrust/system/detail/generic/find.inl index a391e5e83..bceeab3a1 100644 --- a/thrust/system/detail/generic/find.inl +++ b/thrust/system/detail/generic/find.inl @@ -40,7 +40,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -55,7 +55,7 @@ InputIterator find(thrust::execution_policy &exec, template struct find_if_functor { - __host__ __device__ + THRUST_HOST_DEVICE TupleType operator()(const TupleType& lhs, const TupleType& rhs) const { // select the smallest index among true results @@ -76,7 +76,7 @@ struct find_if_functor template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -134,7 +134,7 @@ InputIterator find_if(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if_not(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/for_each.h b/thrust/system/detail/generic/for_each.h index 0c6810307..41bc8da77 100644 --- a/thrust/system/detail/generic/for_each.h +++ b/thrust/system/detail/generic/for_each.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each(thrust::execution_policy &, InputIterator first, InputIterator , @@ -56,7 +56,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each_n(thrust::execution_policy &, InputIterator first, Size , diff --git a/thrust/system/detail/generic/gather.h b/thrust/system/detail/generic/gather.h index 5b6b41831..d627795e2 100644 --- a/thrust/system/detail/generic/gather.h +++ b/thrust/system/detail/generic/gather.h @@ -33,7 +33,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather(thrust::execution_policy &exec, InputIterator map_first, InputIterator map_last, @@ -46,7 +46,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(thrust::execution_policy &exec, InputIterator1 map_first, InputIterator1 map_last, @@ -61,7 +61,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(thrust::execution_policy &exec, InputIterator1 map_first, InputIterator1 map_last, diff --git a/thrust/system/detail/generic/gather.inl b/thrust/system/detail/generic/gather.inl index 7ab550edf..5ba90ce91 100644 --- a/thrust/system/detail/generic/gather.inl +++ b/thrust/system/detail/generic/gather.inl @@ -36,7 +36,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather(thrust::execution_policy &exec, InputIterator map_first, InputIterator map_last, @@ -56,7 +56,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(thrust::execution_policy &exec, InputIterator1 map_first, InputIterator1 map_last, @@ -81,7 +81,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator gather_if(thrust::execution_policy &exec, InputIterator1 map_first, InputIterator1 map_last, diff --git a/thrust/system/detail/generic/generate.h b/thrust/system/detail/generic/generate.h index a9846c5be..cac99d67b 100644 --- a/thrust/system/detail/generic/generate.h +++ b/thrust/system/detail/generic/generate.h @@ -31,7 +31,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void generate(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -41,7 +41,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator generate_n(thrust::execution_policy &exec, OutputIterator first, Size n, diff --git a/thrust/system/detail/generic/generate.inl b/thrust/system/detail/generic/generate.inl index 869e0f32b..218e5c319 100644 --- a/thrust/system/detail/generic/generate.inl +++ b/thrust/system/detail/generic/generate.inl @@ -33,7 +33,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void generate(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -65,7 +65,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator generate_n(thrust::execution_policy &exec, OutputIterator first, Size n, diff --git a/thrust/system/detail/generic/inner_product.h b/thrust/system/detail/generic/inner_product.h index 62d10d31f..f8d2a00c0 100644 --- a/thrust/system/detail/generic/inner_product.h +++ b/thrust/system/detail/generic/inner_product.h @@ -30,7 +30,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -39,7 +39,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/generic/inner_product.inl b/thrust/system/detail/generic/inner_product.inl index 5055ec10f..febf711e4 100644 --- a/thrust/system/detail/generic/inner_product.inl +++ b/thrust/system/detail/generic/inner_product.inl @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -46,7 +46,7 @@ OutputType inner_product(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputType inner_product(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/generic/logical.h b/thrust/system/detail/generic/logical.h index e261154e2..5cf3e1601 100644 --- a/thrust/system/detail/generic/logical.h +++ b/thrust/system/detail/generic/logical.h @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE bool all_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) { return thrust::find_if(exec, first, last, thrust::detail::not1(pred)) == last; @@ -40,7 +40,7 @@ bool all_of(thrust::execution_policy &exec, InputIterator first template -__host__ __device__ +THRUST_HOST_DEVICE bool any_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) { return thrust::find_if(exec, first, last, pred) != last; @@ -48,7 +48,7 @@ bool any_of(thrust::execution_policy &exec, InputIterator first template -__host__ __device__ +THRUST_HOST_DEVICE bool none_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) { return !thrust::any_of(exec, first, last, pred); diff --git a/thrust/system/detail/generic/memory.h b/thrust/system/detail/generic/memory.h index 675cc7302..159c21624 100644 --- a/thrust/system/detail/generic/memory.h +++ b/thrust/system/detail/generic/memory.h @@ -38,27 +38,27 @@ namespace generic { template -__host__ __device__ +THRUST_HOST_DEVICE void malloc(thrust::execution_policy &, Size); template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pointer malloc(thrust::execution_policy &s, std::size_t n); template -__host__ __device__ +THRUST_HOST_DEVICE void free(thrust::execution_policy &, Pointer); template -__host__ __device__ +THRUST_HOST_DEVICE void assign_value(tag, Pointer1, Pointer2); template -__host__ __device__ +THRUST_HOST_DEVICE void get_value(thrust::execution_policy &, Pointer); template -__host__ __device__ +THRUST_HOST_DEVICE void iter_swap(thrust::execution_policy&, Pointer1, Pointer2); } // end generic diff --git a/thrust/system/detail/generic/memory.inl b/thrust/system/detail/generic/memory.inl index b85729098..29c89de02 100644 --- a/thrust/system/detail/generic/memory.inl +++ b/thrust/system/detail/generic/memory.inl @@ -33,7 +33,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void malloc(thrust::execution_policy &, Size) { THRUST_STATIC_ASSERT_MSG( @@ -44,7 +44,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pointer malloc(thrust::execution_policy &exec, std::size_t n) { @@ -55,7 +55,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void free(thrust::execution_policy &, Pointer) { THRUST_STATIC_ASSERT_MSG( @@ -66,7 +66,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void assign_value(thrust::execution_policy &, Pointer1, Pointer2) { THRUST_STATIC_ASSERT_MSG( @@ -77,7 +77,7 @@ void assign_value(thrust::execution_policy &, Pointer1, Pointer2) template -__host__ __device__ +THRUST_HOST_DEVICE void get_value(thrust::execution_policy &, Pointer) { THRUST_STATIC_ASSERT_MSG( @@ -88,7 +88,7 @@ void get_value(thrust::execution_policy &, Pointer) template -__host__ __device__ +THRUST_HOST_DEVICE void iter_swap(thrust::execution_policy &, Pointer1, Pointer2) { THRUST_STATIC_ASSERT_MSG( diff --git a/thrust/system/detail/generic/merge.h b/thrust/system/detail/generic/merge.h index 6e8246407..62346be43 100644 --- a/thrust/system/detail/generic/merge.h +++ b/thrust/system/detail/generic/merge.h @@ -35,7 +35,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -49,7 +49,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -59,7 +59,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, @@ -71,7 +71,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, diff --git a/thrust/system/detail/generic/merge.inl b/thrust/system/detail/generic/merge.inl index 03b77e623..bc14dff98 100644 --- a/thrust/system/detail/generic/merge.inl +++ b/thrust/system/detail/generic/merge.inl @@ -39,7 +39,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(thrust::execution_policy &, InputIterator1, InputIterator1, @@ -60,7 +60,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -74,7 +74,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, @@ -109,7 +109,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, InputIterator1 keys_last1, diff --git a/thrust/system/detail/generic/mismatch.h b/thrust/system/detail/generic/mismatch.h index 4a71cd344..2e6e79892 100644 --- a/thrust/system/detail/generic/mismatch.h +++ b/thrust/system/detail/generic/mismatch.h @@ -30,7 +30,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(thrust::execution_policy &exec, InputIterator1 first1, @@ -39,7 +39,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(thrust::execution_policy &exec, InputIterator1 first1, diff --git a/thrust/system/detail/generic/mismatch.inl b/thrust/system/detail/generic/mismatch.inl index 1ca5c39d2..a2fc75e10 100644 --- a/thrust/system/detail/generic/mismatch.inl +++ b/thrust/system/detail/generic/mismatch.inl @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(thrust::execution_policy &exec, InputIterator1 first1, @@ -46,7 +46,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair mismatch(thrust::execution_policy &exec, InputIterator1 first1, diff --git a/thrust/system/detail/generic/partition.h b/thrust/system/detail/generic/partition.h index 113d6ecbc..b5f769934 100644 --- a/thrust/system/detail/generic/partition.h +++ b/thrust/system/detail/generic/partition.h @@ -36,7 +36,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -46,7 +46,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -59,7 +59,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(thrust::execution_policy &exec, InputIterator first, @@ -75,7 +75,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(thrust::execution_policy &exec, InputIterator1 first, @@ -89,7 +89,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -100,7 +100,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -113,7 +113,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(thrust::execution_policy &exec, InputIterator first, @@ -129,7 +129,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(thrust::execution_policy &exec, InputIterator1 first, @@ -143,7 +143,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition_point(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -153,7 +153,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_partitioned(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/partition.inl b/thrust/system/detail/generic/partition.inl index ab56fdd57..e5524569f 100644 --- a/thrust/system/detail/generic/partition.inl +++ b/thrust/system/detail/generic/partition.inl @@ -43,7 +43,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -69,7 +69,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -99,7 +99,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(thrust::execution_policy &exec, InputIterator first, @@ -126,7 +126,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(thrust::execution_policy &exec, InputIterator1 first, @@ -151,7 +151,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -165,7 +165,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -181,7 +181,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(thrust::execution_policy &exec, InputIterator first, @@ -200,7 +200,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair partition_copy(thrust::execution_policy &exec, InputIterator1 first, @@ -217,7 +217,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition_point(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -230,7 +230,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_partitioned(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/per_device_resource.h b/thrust/system/detail/generic/per_device_resource.h index 606f91f36..b07af22ca 100644 --- a/thrust/system/detail/generic/per_device_resource.h +++ b/thrust/system/detail/generic/per_device_resource.h @@ -32,7 +32,7 @@ namespace generic template -__host__ +THRUST_HOST MR * get_per_device_resource(thrust::detail::execution_policy_base&) { return mr::get_global_resource(); diff --git a/thrust/system/detail/generic/reduce.h b/thrust/system/detail/generic/reduce.h index f28b11a87..b1bb84830 100644 --- a/thrust/system/detail/generic/reduce.h +++ b/thrust/system/detail/generic/reduce.h @@ -31,13 +31,13 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::value_type reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last); template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init); @@ -45,7 +45,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init, BinaryFunction binary_op); diff --git a/thrust/system/detail/generic/reduce.inl b/thrust/system/detail/generic/reduce.inl index d673d0cf8..5a7fa8218 100644 --- a/thrust/system/detail/generic/reduce.inl +++ b/thrust/system/detail/generic/reduce.inl @@ -34,7 +34,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::value_type reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last) { @@ -46,7 +46,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init) { // use plus by default @@ -58,7 +58,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType reduce(thrust::execution_policy &, RandomAccessIterator, RandomAccessIterator, diff --git a/thrust/system/detail/generic/reduce_by_key.h b/thrust/system/detail/generic/reduce_by_key.h index 8ba47e11f..69875c413 100644 --- a/thrust/system/detail/generic/reduce_by_key.h +++ b/thrust/system/detail/generic/reduce_by_key.h @@ -35,7 +35,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(thrust::execution_policy &exec, InputIterator1 keys_first, @@ -50,7 +50,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(thrust::execution_policy &exec, InputIterator1 keys_first, @@ -67,7 +67,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(thrust::execution_policy &exec, InputIterator1 keys_first, diff --git a/thrust/system/detail/generic/reduce_by_key.inl b/thrust/system/detail/generic/reduce_by_key.inl index 880cc7526..dfd3653e2 100644 --- a/thrust/system/detail/generic/reduce_by_key.inl +++ b/thrust/system/detail/generic/reduce_by_key.inl @@ -50,10 +50,10 @@ struct reduce_by_key_functor typedef typename thrust::tuple result_type; - __host__ __device__ + THRUST_HOST_DEVICE reduce_by_key_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {} - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(result_type a, result_type b) { return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)), @@ -72,7 +72,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(thrust::execution_policy &exec, InputIterator1 keys_first, @@ -137,7 +137,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(thrust::execution_policy &exec, InputIterator1 keys_first, @@ -159,7 +159,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(thrust::execution_policy &exec, InputIterator1 keys_first, diff --git a/thrust/system/detail/generic/remove.h b/thrust/system/detail/generic/remove.h index 37354ef80..42d96158f 100644 --- a/thrust/system/detail/generic/remove.h +++ b/thrust/system/detail/generic/remove.h @@ -36,7 +36,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -47,7 +47,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -58,7 +58,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -69,7 +69,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -81,7 +81,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -94,7 +94,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/generic/remove.inl b/thrust/system/detail/generic/remove.inl index e51a3caee..bef4e4984 100644 --- a/thrust/system/detail/generic/remove.inl +++ b/thrust/system/detail/generic/remove.inl @@ -36,7 +36,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -53,7 +53,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -70,7 +70,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -90,7 +90,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -111,7 +111,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -127,7 +127,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/generic/replace.h b/thrust/system/detail/generic/replace.h index 0821d6c07..ae34f611b 100644 --- a/thrust/system/detail/generic/replace.h +++ b/thrust/system/detail/generic/replace.h @@ -30,7 +30,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -40,7 +40,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -51,7 +51,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -61,7 +61,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -70,7 +70,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -80,7 +80,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void replace(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/replace.inl b/thrust/system/detail/generic/replace.inl index b0a508bf6..16e4c6431 100644 --- a/thrust/system/detail/generic/replace.inl +++ b/thrust/system/detail/generic/replace.inl @@ -38,11 +38,11 @@ namespace detail template struct new_value_if { - __host__ __device__ + THRUST_HOST_DEVICE new_value_if(Predicate p, NewType nv):pred(p),new_value(nv){} template - __host__ __device__ + THRUST_HOST_DEVICE OutputType operator()(const InputType &x) const { return pred(x) ? new_value : x; @@ -51,7 +51,7 @@ template // this version of operator()() works like the previous but // feeds its second argument to pred template - __host__ __device__ + THRUST_HOST_DEVICE OutputType operator()(const InputType &x, const PredicateArgumentType &y) { return pred(y) ? new_value : x; @@ -66,11 +66,11 @@ template template struct constant_unary { - __host__ __device__ + THRUST_HOST_DEVICE constant_unary(T _c):c(_c){} template - __host__ __device__ + THRUST_HOST_DEVICE T operator()(U &) { return c; @@ -84,7 +84,7 @@ template template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -100,7 +100,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -117,7 +117,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -132,7 +132,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -145,7 +145,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -159,7 +159,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void replace(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/reverse.h b/thrust/system/detail/generic/reverse.h index 65c77ae75..d3d6b1963 100644 --- a/thrust/system/detail/generic/reverse.h +++ b/thrust/system/detail/generic/reverse.h @@ -30,7 +30,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void reverse(thrust::execution_policy &exec, BidirectionalIterator first, BidirectionalIterator last); @@ -39,7 +39,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator reverse_copy(thrust::execution_policy &exec, BidirectionalIterator first, BidirectionalIterator last, diff --git a/thrust/system/detail/generic/reverse.inl b/thrust/system/detail/generic/reverse.inl index 1ce6db38b..a3cb9cc47 100644 --- a/thrust/system/detail/generic/reverse.inl +++ b/thrust/system/detail/generic/reverse.inl @@ -35,7 +35,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void reverse(thrust::execution_policy &exec, BidirectionalIterator first, BidirectionalIterator last) @@ -55,7 +55,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator reverse_copy(thrust::execution_policy &exec, BidirectionalIterator first, BidirectionalIterator last, diff --git a/thrust/system/detail/generic/scalar/binary_search.h b/thrust/system/detail/generic/scalar/binary_search.h index 3e019c223..c3d017ab9 100644 --- a/thrust/system/detail/generic/scalar/binary_search.h +++ b/thrust/system/detail/generic/scalar/binary_search.h @@ -34,40 +34,40 @@ namespace scalar { template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator lower_bound_n(RandomAccessIterator first, Size n, const T &val, BinaryPredicate comp); template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last, const T &val, BinaryPredicate comp); template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator upper_bound_n(RandomAccessIterator first, Size n, const T &val, BinaryPredicate comp); template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last, const T &val, BinaryPredicate comp); template -__host__ __device__ +THRUST_HOST_DEVICE pair equal_range(RandomAccessIterator first, RandomAccessIterator last, const T &val, BinaryPredicate comp); template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp); } // end scalar diff --git a/thrust/system/detail/generic/scalar/binary_search.inl b/thrust/system/detail/generic/scalar/binary_search.inl index 61c71fba4..324edebcc 100644 --- a/thrust/system/detail/generic/scalar/binary_search.inl +++ b/thrust/system/detail/generic/scalar/binary_search.inl @@ -36,7 +36,7 @@ namespace scalar { template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator lower_bound_n(RandomAccessIterator first, Size n, const T &val, @@ -68,7 +68,7 @@ RandomAccessIterator lower_bound_n(RandomAccessIterator first, // XXX generalize these upon implementation of scalar::distance & scalar::advance template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last, const T &val, BinaryPredicate comp) @@ -78,7 +78,7 @@ RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterato } template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator upper_bound_n(RandomAccessIterator first, Size n, const T &val, @@ -108,7 +108,7 @@ RandomAccessIterator upper_bound_n(RandomAccessIterator first, } template -__host__ __device__ +THRUST_HOST_DEVICE RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last, const T &val, BinaryPredicate comp) @@ -118,7 +118,7 @@ RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterato } template -__host__ __device__ +THRUST_HOST_DEVICE pair equal_range(RandomAccessIterator first, RandomAccessIterator last, const T &val, @@ -130,7 +130,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp) { RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(first, last, value, comp); diff --git a/thrust/system/detail/generic/scan.h b/thrust/system/detail/generic/scan.h index 476441ab6..1fccbf99a 100644 --- a/thrust/system/detail/generic/scan.h +++ b/thrust/system/detail/generic/scan.h @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -44,7 +44,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -55,7 +55,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -66,7 +66,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -80,7 +80,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/scan.inl b/thrust/system/detail/generic/scan.inl index 99e7fa07b..d1984c4b9 100644 --- a/thrust/system/detail/generic/scan.inl +++ b/thrust/system/detail/generic/scan.inl @@ -38,7 +38,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -52,7 +52,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -68,7 +68,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -84,7 +84,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(thrust::execution_policy &, InputIterator, InputIterator, @@ -104,7 +104,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(thrust::execution_policy &, InputIterator, InputIterator, diff --git a/thrust/system/detail/generic/scan_by_key.h b/thrust/system/detail/generic/scan_by_key.h index 9e38ac933..ddc10f0cb 100644 --- a/thrust/system/detail/generic/scan_by_key.h +++ b/thrust/system/detail/generic/scan_by_key.h @@ -37,7 +37,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -50,7 +50,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -65,7 +65,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -79,7 +79,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -92,7 +92,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -107,7 +107,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -124,7 +124,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/generic/scan_by_key.inl b/thrust/system/detail/generic/scan_by_key.inl index 28fd3474c..bc493bced 100644 --- a/thrust/system/detail/generic/scan_by_key.inl +++ b/thrust/system/detail/generic/scan_by_key.inl @@ -46,10 +46,10 @@ struct segmented_scan_functor typedef typename thrust::tuple result_type; - __host__ __device__ + THRUST_HOST_DEVICE segmented_scan_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {} - __host__ __device__ + THRUST_HOST_DEVICE result_type operator()(result_type a, result_type b) { return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)), @@ -65,7 +65,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -81,7 +81,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -99,7 +99,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -139,7 +139,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -156,7 +156,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -174,7 +174,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -194,7 +194,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/generic/scatter.h b/thrust/system/detail/generic/scatter.h index 6bb7949ef..3bba2caf7 100644 --- a/thrust/system/detail/generic/scatter.h +++ b/thrust/system/detail/generic/scatter.h @@ -33,7 +33,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -46,7 +46,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -61,7 +61,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/generic/scatter.inl b/thrust/system/detail/generic/scatter.inl index 5b4798708..35f7598fd 100644 --- a/thrust/system/detail/generic/scatter.inl +++ b/thrust/system/detail/generic/scatter.inl @@ -36,7 +36,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -56,7 +56,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -76,7 +76,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/generic/select_system.h b/thrust/system/detail/generic/select_system.h index 7619b80e5..eb5b999c9 100644 --- a/thrust/system/detail/generic/select_system.h +++ b/thrust/system/detail/generic/select_system.h @@ -51,7 +51,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::disable_if< select_system1_exists::value, System & @@ -59,7 +59,7 @@ __host__ __device__ select_system(thrust::execution_policy &system); template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if_defined< thrust::detail::minimum_system >::type @@ -67,7 +67,7 @@ __host__ __device__ thrust::execution_policy &system2); template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system3_exists::value, thrust::detail::minimum_system @@ -77,7 +77,7 @@ __host__ __device__ thrust::execution_policy &system3); template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system4_exists::value, thrust::detail::minimum_system @@ -88,7 +88,7 @@ __host__ __device__ thrust::execution_policy &system4); template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system5_exists::value, thrust::detail::minimum_system @@ -100,7 +100,7 @@ __host__ __device__ thrust::execution_policy &system5); template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system6_exists::value, thrust::detail::minimum_system @@ -113,7 +113,7 @@ __host__ __device__ thrust::execution_policy &system6); // Map a single any_system_tag to device_system_tag. -inline __host__ __device__ +inline THRUST_HOST_DEVICE thrust::device_system_tag select_system(thrust::any_system_tag); } // end generic diff --git a/thrust/system/detail/generic/select_system.inl b/thrust/system/detail/generic/select_system.inl index b69d17c45..f35c26ef8 100644 --- a/thrust/system/detail/generic/select_system.inl +++ b/thrust/system/detail/generic/select_system.inl @@ -34,7 +34,7 @@ namespace select_system_detail // min_system case 1: both systems have the same type, just return the first one template -__host__ __device__ +THRUST_HOST_DEVICE System &min_system(thrust::execution_policy &system1, thrust::execution_policy &) { @@ -44,7 +44,7 @@ System &min_system(thrust::execution_policy &system1, // min_system case 2: systems have differing type and the first type is considered the minimum template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if< thrust::detail::is_same< System1, @@ -60,7 +60,7 @@ typename thrust::detail::enable_if< // min_system case 3: systems have differing type and the second type is considered the minimum template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if< thrust::detail::is_same< System2, @@ -78,7 +78,7 @@ typename thrust::detail::enable_if< template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::disable_if< select_system1_exists::value, System & @@ -90,7 +90,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::enable_if_defined< thrust::detail::minimum_system >::type @@ -102,7 +102,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system3_exists::value, thrust::detail::minimum_system @@ -116,7 +116,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system4_exists::value, thrust::detail::minimum_system @@ -131,7 +131,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system5_exists::value, thrust::detail::minimum_system @@ -147,7 +147,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::lazy_disable_if< select_system6_exists::value, thrust::detail::minimum_system @@ -164,7 +164,7 @@ __host__ __device__ // map a single any_system_tag to device_system_tag -inline __host__ __device__ +inline THRUST_HOST_DEVICE thrust::device_system_tag select_system(thrust::any_system_tag) { return thrust::device_system_tag(); diff --git a/thrust/system/detail/generic/sequence.h b/thrust/system/detail/generic/sequence.h index 26bf17bb8..24d85d4fc 100644 --- a/thrust/system/detail/generic/sequence.h +++ b/thrust/system/detail/generic/sequence.h @@ -31,14 +31,14 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last); template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -46,7 +46,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/sequence.inl b/thrust/system/detail/generic/sequence.inl index 7081f38ce..2a848c8bf 100644 --- a/thrust/system/detail/generic/sequence.inl +++ b/thrust/system/detail/generic/sequence.inl @@ -31,7 +31,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last) @@ -43,7 +43,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -60,8 +60,8 @@ struct compute_sequence_value T init; T step; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE T operator()(std::size_t i) const { return init + step * i; @@ -73,8 +73,8 @@ struct compute_sequence_value:: T init; T step; - __thrust_exec_check_disable__ - __host__ __device__ + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE T operator()(std::size_t i) const { return init + step * static_cast(i); @@ -83,7 +83,7 @@ struct compute_sequence_value:: } template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/set_operations.h b/thrust/system/detail/generic/set_operations.h index 37665d78d..1400db99a 100644 --- a/thrust/system/detail/generic/set_operations.h +++ b/thrust/system/detail/generic/set_operations.h @@ -33,7 +33,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -48,7 +48,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -65,7 +65,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -86,7 +86,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -104,7 +104,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(thrust::execution_policy &system, InputIterator1 first1, InputIterator1 last1, @@ -119,7 +119,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(thrust::execution_policy &system, InputIterator1 first1, InputIterator1 last1, @@ -135,7 +135,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(thrust::execution_policy &system, InputIterator1 keys_first1, @@ -154,7 +154,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(thrust::execution_policy &system, InputIterator1 keys_first1, @@ -171,7 +171,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(thrust::execution_policy &system, InputIterator1 first1, InputIterator1 last1, @@ -186,7 +186,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(thrust::execution_policy &system, InputIterator1 first1, InputIterator1 last1, @@ -203,7 +203,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(thrust::execution_policy &system, InputIterator1 keys_first1, @@ -224,7 +224,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(thrust::execution_policy &system, InputIterator1 keys_first1, @@ -242,7 +242,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(thrust::execution_policy &system, InputIterator1 first1, InputIterator1 last1, @@ -257,7 +257,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(thrust::execution_policy &system, InputIterator1 first1, InputIterator1 last1, @@ -274,7 +274,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(thrust::execution_policy &system, InputIterator1 keys_first1, @@ -295,7 +295,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(thrust::execution_policy &system, InputIterator1 keys_first1, diff --git a/thrust/system/detail/generic/set_operations.inl b/thrust/system/detail/generic/set_operations.inl index 4363be5c0..ba0e50c00 100644 --- a/thrust/system/detail/generic/set_operations.inl +++ b/thrust/system/detail/generic/set_operations.inl @@ -38,7 +38,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -58,7 +58,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -83,7 +83,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_difference_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -124,7 +124,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -143,7 +143,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -166,7 +166,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_intersection_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -213,7 +213,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -233,7 +233,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -258,7 +258,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_symmetric_difference_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -299,7 +299,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -319,7 +319,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -344,7 +344,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair set_union_by_key(thrust::execution_policy &exec, InputIterator1 keys_first1, @@ -386,7 +386,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(thrust::execution_policy &, InputIterator1, InputIterator1, @@ -408,7 +408,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(thrust::execution_policy &, InputIterator1, InputIterator1, @@ -430,7 +430,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(thrust::execution_policy &, InputIterator1, InputIterator1, @@ -452,7 +452,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(thrust::execution_policy &, InputIterator1, InputIterator1, diff --git a/thrust/system/detail/generic/shuffle.h b/thrust/system/detail/generic/shuffle.h index 8f8e21afd..22d4d6971 100644 --- a/thrust/system/detail/generic/shuffle.h +++ b/thrust/system/detail/generic/shuffle.h @@ -34,13 +34,13 @@ namespace detail { namespace generic { template -__host__ __device__ void shuffle( +THRUST_HOST_DEVICE void shuffle( thrust::execution_policy& exec, RandomIterator first, RandomIterator last, URBG&& g); template -__host__ __device__ void shuffle_copy( +THRUST_HOST_DEVICE void shuffle_copy( thrust::execution_policy& exec, RandomIterator first, RandomIterator last, OutputIterator result, URBG&& g); diff --git a/thrust/system/detail/generic/shuffle.inl b/thrust/system/detail/generic/shuffle.inl index 0deb1f631..4eb491214 100644 --- a/thrust/system/detail/generic/shuffle.inl +++ b/thrust/system/detail/generic/shuffle.inl @@ -39,7 +39,7 @@ class feistel_bijection { public: template - __host__ __device__ feistel_bijection(std::uint64_t m, URBG&& g) { + THRUST_HOST_DEVICE feistel_bijection(std::uint64_t m, URBG&& g) { std::uint64_t total_bits = get_cipher_bits(m); // Half bits rounded down left_side_bits = total_bits / 2; @@ -53,11 +53,11 @@ class feistel_bijection { } } - __host__ __device__ std::uint64_t nearest_power_of_two() const { + THRUST_HOST_DEVICE std::uint64_t nearest_power_of_two() const { return 1ull << (left_side_bits + right_side_bits); } - __host__ __device__ std::uint64_t operator()(const std::uint64_t val) const { + THRUST_HOST_DEVICE std::uint64_t operator()(const std::uint64_t val) const { std::uint32_t state[2] = { static_cast( val >> right_side_bits ), static_cast( val & right_side_mask ) }; for( std::uint32_t i = 0; i < num_rounds; i++ ) { @@ -74,7 +74,7 @@ class feistel_bijection { private: // Perform 64 bit multiplication and save result in two 32 bit int - static __host__ __device__ void mulhilo( std::uint64_t a, std::uint64_t b, std::uint32_t& hi, std::uint32_t& lo ) + static THRUST_HOST_DEVICE void mulhilo( std::uint64_t a, std::uint64_t b, std::uint32_t& hi, std::uint32_t& lo ) { std::uint64_t product = a * b; hi = static_cast( product >> 32 ); @@ -82,7 +82,7 @@ class feistel_bijection { } // Find the nearest power of two - static __host__ __device__ std::uint64_t get_cipher_bits(std::uint64_t m) { + static THRUST_HOST_DEVICE std::uint64_t get_cipher_bits(std::uint64_t m) { if (m <= 16) return 4; std::uint64_t i = 0; m--; @@ -108,7 +108,7 @@ struct key_flag_tuple { // scan only flags struct key_flag_scan_op { - __host__ __device__ key_flag_tuple operator()(const key_flag_tuple& a, + THRUST_HOST_DEVICE key_flag_tuple operator()(const key_flag_tuple& a, const key_flag_tuple& b) { return {b.key, a.flag + b.flag}; } @@ -117,10 +117,10 @@ struct key_flag_scan_op { struct construct_key_flag_op { std::uint64_t m; feistel_bijection bijection; - __host__ __device__ construct_key_flag_op(std::uint64_t m, + THRUST_HOST_DEVICE construct_key_flag_op(std::uint64_t m, feistel_bijection bijection) : m(m), bijection(bijection) {} - __host__ __device__ key_flag_tuple operator()(std::uint64_t idx) { + THRUST_HOST_DEVICE key_flag_tuple operator()(std::uint64_t idx) { auto gather_key = bijection(idx); return key_flag_tuple{gather_key, (gather_key < m) ? 1ull : 0ull}; } @@ -133,8 +133,8 @@ struct write_output_op { OutputIterT out; // flag contains inclusive scan of valid keys // perform gather using valid keys - __thrust_exec_check_disable__ - __host__ __device__ std::size_t operator()(key_flag_tuple x) { + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE std::size_t operator()(key_flag_tuple x) { if (x.key < m) { // -1 because inclusive scan out[x.flag - 1] = in[x.key]; @@ -144,7 +144,7 @@ struct write_output_op { }; template -__host__ __device__ void shuffle( +THRUST_HOST_DEVICE void shuffle( thrust::execution_policy& exec, RandomIterator first, RandomIterator last, URBG&& g) { using InputType = typename thrust::iterator_value_t; @@ -157,7 +157,7 @@ __host__ __device__ void shuffle( template -__host__ __device__ void shuffle_copy( +THRUST_HOST_DEVICE void shuffle_copy( thrust::execution_policy& exec, RandomIterator first, RandomIterator last, OutputIterator result, URBG&& g) { // m is the length of the input diff --git a/thrust/system/detail/generic/sort.h b/thrust/system/detail/generic/sort.h index cd8d45562..9206f0707 100644 --- a/thrust/system/detail/generic/sort.h +++ b/thrust/system/detail/generic/sort.h @@ -30,7 +30,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void sort(thrust::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last); @@ -39,7 +39,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void sort(thrust::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -49,7 +49,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(thrust::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -60,7 +60,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(thrust::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -70,7 +70,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(thrust::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last); @@ -80,7 +80,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(thrust::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -90,7 +90,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(thrust::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -102,7 +102,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(thrust::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -111,7 +111,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last); @@ -120,7 +120,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -128,7 +128,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last); @@ -137,7 +137,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/sort.inl b/thrust/system/detail/generic/sort.inl index 632cab435..8fa43862c 100644 --- a/thrust/system/detail/generic/sort.inl +++ b/thrust/system/detail/generic/sort.inl @@ -39,7 +39,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void sort(thrust::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last) @@ -52,7 +52,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void sort(thrust::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -66,7 +66,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(thrust::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -81,7 +81,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void sort_by_key(thrust::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -95,7 +95,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(thrust::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last) @@ -108,7 +108,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(thrust::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -120,7 +120,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last) @@ -132,7 +132,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE bool is_sorted(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -143,7 +143,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last) @@ -157,7 +157,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -181,7 +181,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(thrust::execution_policy &, RandomAccessIterator, RandomAccessIterator, @@ -198,7 +198,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(thrust::execution_policy &, RandomAccessIterator1, RandomAccessIterator1, diff --git a/thrust/system/detail/generic/swap_ranges.h b/thrust/system/detail/generic/swap_ranges.h index edb5acf31..9b27f4d16 100644 --- a/thrust/system/detail/generic/swap_ranges.h +++ b/thrust/system/detail/generic/swap_ranges.h @@ -31,7 +31,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator2 swap_ranges(thrust::execution_policy &exec, ForwardIterator1 first1, ForwardIterator1 last1, diff --git a/thrust/system/detail/generic/swap_ranges.inl b/thrust/system/detail/generic/swap_ranges.inl index ea42df35b..a28332e3c 100644 --- a/thrust/system/detail/generic/swap_ranges.inl +++ b/thrust/system/detail/generic/swap_ranges.inl @@ -39,7 +39,7 @@ namespace detail struct swap_pair_elements { template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(Tuple t) { // use unqualified swap to allow ADL to catch any user-defined swap @@ -55,7 +55,7 @@ struct swap_pair_elements template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator2 swap_ranges(thrust::execution_policy &exec, ForwardIterator1 first1, ForwardIterator1 last1, diff --git a/thrust/system/detail/generic/tabulate.h b/thrust/system/detail/generic/tabulate.h index 041093e82..634910f32 100644 --- a/thrust/system/detail/generic/tabulate.h +++ b/thrust/system/detail/generic/tabulate.h @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void tabulate(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/tabulate.inl b/thrust/system/detail/generic/tabulate.inl index 0fd2121c1..d99591354 100644 --- a/thrust/system/detail/generic/tabulate.inl +++ b/thrust/system/detail/generic/tabulate.inl @@ -35,7 +35,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void tabulate(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/generic/tag.h b/thrust/system/detail/generic/tag.h index 48f094797..99f27155a 100644 --- a/thrust/system/detail/generic/tag.h +++ b/thrust/system/detail/generic/tag.h @@ -36,7 +36,7 @@ namespace generic struct tag { template - __host__ __device__ inline + THRUST_HOST_DEVICE inline tag(const T &) {} }; diff --git a/thrust/system/detail/generic/temporary_buffer.h b/thrust/system/detail/generic/temporary_buffer.h index 3cd39addb..59236e49e 100644 --- a/thrust/system/detail/generic/temporary_buffer.h +++ b/thrust/system/detail/generic/temporary_buffer.h @@ -31,19 +31,19 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair, typename thrust::pointer::difference_type> get_temporary_buffer(thrust::execution_policy &exec, typename thrust::pointer::difference_type n); -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void return_temporary_buffer(thrust::execution_policy &exec, Pointer p, std::ptrdiff_t n); -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void return_temporary_buffer(thrust::execution_policy &exec, Pointer p); diff --git a/thrust/system/detail/generic/temporary_buffer.inl b/thrust/system/detail/generic/temporary_buffer.inl index 6d4329c3d..63bf71819 100644 --- a/thrust/system/detail/generic/temporary_buffer.inl +++ b/thrust/system/detail/generic/temporary_buffer.inl @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair, typename thrust::pointer::difference_type> get_temporary_buffer(thrust::execution_policy &exec, typename thrust::pointer::difference_type n) { @@ -48,9 +48,9 @@ __host__ __device__ } // end get_temporary_buffer() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void return_temporary_buffer(thrust::execution_policy &exec, Pointer p, std::ptrdiff_t) { // If we are here, no user customization of the three-argument signature with @@ -67,9 +67,9 @@ __host__ __device__ } // end return_temporary_buffer() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void return_temporary_buffer(thrust::execution_policy &exec, Pointer p) { // If we are here, no user customization of either the old two-argument diff --git a/thrust/system/detail/generic/transform.h b/thrust/system/detail/generic/transform.h index 30e032696..e2d1fe600 100644 --- a/thrust/system/detail/generic/transform.h +++ b/thrust/system/detail/generic/transform.h @@ -31,7 +31,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -43,7 +43,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -56,7 +56,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -70,7 +70,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -86,7 +86,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/generic/transform.inl b/thrust/system/detail/generic/transform.inl index 122c42580..18d033585 100644 --- a/thrust/system/detail/generic/transform.inl +++ b/thrust/system/detail/generic/transform.inl @@ -38,7 +38,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -66,7 +66,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -96,7 +96,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -126,7 +126,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(thrust::execution_policy &exec, InputIterator1 first, InputIterator1 last, @@ -158,7 +158,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/generic/transform_reduce.h b/thrust/system/detail/generic/transform_reduce.h index af510296e..dcc385e78 100644 --- a/thrust/system/detail/generic/transform_reduce.h +++ b/thrust/system/detail/generic/transform_reduce.h @@ -34,7 +34,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType transform_reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/transform_reduce.inl b/thrust/system/detail/generic/transform_reduce.inl index 539c3b22c..7eb60e00b 100644 --- a/thrust/system/detail/generic/transform_reduce.inl +++ b/thrust/system/detail/generic/transform_reduce.inl @@ -35,7 +35,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType transform_reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/transform_scan.h b/thrust/system/detail/generic/transform_scan.h index 05054c965..0b612b7b9 100644 --- a/thrust/system/detail/generic/transform_scan.h +++ b/thrust/system/detail/generic/transform_scan.h @@ -34,7 +34,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_inclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -48,7 +48,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_exclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/transform_scan.inl b/thrust/system/detail/generic/transform_scan.inl index c9c976687..15d1045db 100644 --- a/thrust/system/detail/generic/transform_scan.inl +++ b/thrust/system/detail/generic/transform_scan.inl @@ -40,7 +40,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_inclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -66,7 +66,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_exclusive_scan(thrust::execution_policy &exec, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/generic/uninitialized_copy.h b/thrust/system/detail/generic/uninitialized_copy.h index bac5bcf96..95ab4aa60 100644 --- a/thrust/system/detail/generic/uninitialized_copy.h +++ b/thrust/system/detail/generic/uninitialized_copy.h @@ -31,7 +31,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -41,7 +41,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, InputIterator first, Size n, diff --git a/thrust/system/detail/generic/uninitialized_copy.inl b/thrust/system/detail/generic/uninitialized_copy.inl index 679d1f6ba..ef7272792 100644 --- a/thrust/system/detail/generic/uninitialized_copy.inl +++ b/thrust/system/detail/generic/uninitialized_copy.inl @@ -39,7 +39,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(Tuple t) { const InputType &in = thrust::get<0>(t); @@ -54,7 +54,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -90,7 +90,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -106,7 +106,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, InputIterator first, Size n, @@ -138,7 +138,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, InputIterator first, Size n, @@ -155,7 +155,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -173,7 +173,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, InputIterator first, Size n, diff --git a/thrust/system/detail/generic/uninitialized_fill.h b/thrust/system/detail/generic/uninitialized_fill.h index 4f5404508..d07cf508e 100644 --- a/thrust/system/detail/generic/uninitialized_fill.h +++ b/thrust/system/detail/generic/uninitialized_fill.h @@ -31,7 +31,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE void uninitialized_fill(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -41,7 +41,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, ForwardIterator first, Size n, diff --git a/thrust/system/detail/generic/uninitialized_fill.inl b/thrust/system/detail/generic/uninitialized_fill.inl index 062414945..b9faf8a65 100644 --- a/thrust/system/detail/generic/uninitialized_fill.inl +++ b/thrust/system/detail/generic/uninitialized_fill.inl @@ -36,7 +36,7 @@ namespace detail template -__host__ __device__ +THRUST_HOST_DEVICE void uninitialized_fill(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -49,7 +49,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void uninitialized_fill(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -65,7 +65,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, ForwardIterator first, Size n, @@ -79,7 +79,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, ForwardIterator first, Size n, @@ -96,7 +96,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void uninitialized_fill(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -114,7 +114,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, ForwardIterator first, Size n, diff --git a/thrust/system/detail/generic/unique.h b/thrust/system/detail/generic/unique.h index ce3bff884..15e04696c 100644 --- a/thrust/system/detail/generic/unique.h +++ b/thrust/system/detail/generic/unique.h @@ -30,7 +30,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last); @@ -39,7 +39,7 @@ ForwardIterator unique(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -49,7 +49,7 @@ ForwardIterator unique(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -60,7 +60,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -70,7 +70,7 @@ OutputIterator unique_copy(thrust::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(thrust::execution_policy &exec, ForwardIterator first, @@ -80,7 +80,7 @@ typename thrust::iterator_traits::difference_type template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(thrust::execution_policy &exec, ForwardIterator first, diff --git a/thrust/system/detail/generic/unique.inl b/thrust/system/detail/generic/unique.inl index bb66e3585..64db32afc 100644 --- a/thrust/system/detail/generic/unique.inl +++ b/thrust/system/detail/generic/unique.inl @@ -40,7 +40,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last) @@ -54,7 +54,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(thrust::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -71,7 +71,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -86,7 +86,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(thrust::execution_policy &exec, InputIterator first, InputIterator last, @@ -104,7 +104,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(thrust::execution_policy &exec, ForwardIterator first, @@ -121,7 +121,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(thrust::execution_policy &exec, ForwardIterator first, diff --git a/thrust/system/detail/generic/unique_by_key.h b/thrust/system/detail/generic/unique_by_key.h index 0ea9e7cc8..5ff7405cd 100644 --- a/thrust/system/detail/generic/unique_by_key.h +++ b/thrust/system/detail/generic/unique_by_key.h @@ -32,7 +32,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(thrust::execution_policy &exec, ForwardIterator1 keys_first, @@ -44,7 +44,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(thrust::execution_policy &exec, ForwardIterator1 keys_first, @@ -58,7 +58,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(thrust::execution_policy &exec, InputIterator1 keys_first, @@ -74,7 +74,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(thrust::execution_policy &exec, InputIterator1 keys_first, diff --git a/thrust/system/detail/generic/unique_by_key.inl b/thrust/system/detail/generic/unique_by_key.inl index ffcf1dd0c..fa2393d21 100644 --- a/thrust/system/detail/generic/unique_by_key.inl +++ b/thrust/system/detail/generic/unique_by_key.inl @@ -40,7 +40,7 @@ namespace generic template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(thrust::execution_policy &exec, ForwardIterator1 keys_first, @@ -56,7 +56,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(thrust::execution_policy &exec, ForwardIterator1 keys_first, @@ -81,7 +81,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(thrust::execution_policy &exec, InputIterator1 keys_first, @@ -101,7 +101,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(thrust::execution_policy &exec, InputIterator1 keys_first, diff --git a/thrust/system/detail/internal/decompose.h b/thrust/system/detail/internal/decompose.h index 58af7c551..1e5f7aa3a 100644 --- a/thrust/system/detail/internal/decompose.h +++ b/thrust/system/detail/internal/decompose.h @@ -32,16 +32,16 @@ namespace internal public: typedef IndexType index_type; - __host__ __device__ + THRUST_HOST_DEVICE index_range(index_type begin, index_type end) : m_begin(begin), m_end(end) {} - __host__ __device__ + THRUST_HOST_DEVICE index_type begin(void) const { return m_begin; } - __host__ __device__ + THRUST_HOST_DEVICE index_type end(void) const { return m_end; } - __host__ __device__ + THRUST_HOST_DEVICE index_type size(void) const { return m_end - m_begin; } private: @@ -56,7 +56,7 @@ namespace internal typedef IndexType index_type; typedef index_range range_type; - __host__ __device__ + THRUST_HOST_DEVICE uniform_decomposition(index_type N, index_type granularity, index_type max_intervals) : m_N(N), m_intervals((N + granularity - 1) / granularity), @@ -73,7 +73,7 @@ namespace internal } } - __host__ __device__ + THRUST_HOST_DEVICE index_range operator[](const index_type& i) const { if (i < m_threshold) @@ -90,7 +90,7 @@ namespace internal } } - __host__ __device__ + THRUST_HOST_DEVICE index_type size(void) const { return m_intervals; diff --git a/thrust/system/detail/sequential/adjacent_difference.h b/thrust/system/detail/sequential/adjacent_difference.h index 4a9dad82c..04eeb477b 100644 --- a/thrust/system/detail/sequential/adjacent_difference.h +++ b/thrust/system/detail/sequential/adjacent_difference.h @@ -34,12 +34,12 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator adjacent_difference(sequential::execution_policy &, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/sequential/assign_value.h b/thrust/system/detail/sequential/assign_value.h index 0eb145d13..01d6f6231 100644 --- a/thrust/system/detail/sequential/assign_value.h +++ b/thrust/system/detail/sequential/assign_value.h @@ -29,7 +29,7 @@ namespace sequential { template -__host__ __device__ +THRUST_HOST_DEVICE void assign_value(sequential::execution_policy &, Pointer1 dst, Pointer2 src) { *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); diff --git a/thrust/system/detail/sequential/binary_search.h b/thrust/system/detail/sequential/binary_search.h index 2da5080f4..32baf1beb 100644 --- a/thrust/system/detail/sequential/binary_search.h +++ b/thrust/system/detail/sequential/binary_search.h @@ -37,12 +37,12 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator lower_bound(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -82,12 +82,12 @@ ForwardIterator lower_bound(sequential::execution_policy &, } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator upper_bound(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -127,12 +127,12 @@ ForwardIterator upper_bound(sequential::execution_policy &, } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE bool binary_search(sequential::execution_policy &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/sequential/copy.h b/thrust/system/detail/sequential/copy.h index 0dd2cdad5..a61bc3057 100644 --- a/thrust/system/detail/sequential/copy.h +++ b/thrust/system/detail/sequential/copy.h @@ -35,7 +35,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(sequential::execution_policy &exec, InputIterator first, InputIterator last, @@ -46,7 +46,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(sequential::execution_policy &exec, InputIterator first, Size n, diff --git a/thrust/system/detail/sequential/copy.inl b/thrust/system/detail/sequential/copy.inl index 850f20f1e..e7d380464 100644 --- a/thrust/system/detail/sequential/copy.inl +++ b/thrust/system/detail/sequential/copy.inl @@ -38,7 +38,7 @@ namespace copy_detail // returns the raw pointer associated with a Pointer-like thing template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::detail::pointer_traits::raw_pointer get(Pointer ptr) { @@ -46,10 +46,10 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result, @@ -63,10 +63,10 @@ __host__ __device__ } // end copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result, @@ -76,11 +76,11 @@ __host__ __device__ } // end copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(InputIterator first, Size n, OutputIterator result, @@ -91,11 +91,11 @@ __host__ __device__ } // end copy_n() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(InputIterator first, Size n, OutputIterator result, @@ -108,11 +108,11 @@ __host__ __device__ } // end namespace copy_detail -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy(sequential::execution_policy &, InputIterator first, InputIterator last, @@ -123,12 +123,12 @@ __host__ __device__ } // end copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_n(sequential::execution_policy &, InputIterator first, Size n, diff --git a/thrust/system/detail/sequential/copy_backward.h b/thrust/system/detail/sequential/copy_backward.h index d127ac80d..9cd9bf7f7 100644 --- a/thrust/system/detail/sequential/copy_backward.h +++ b/thrust/system/detail/sequential/copy_backward.h @@ -27,10 +27,10 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE BidirectionalIterator2 copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 result) diff --git a/thrust/system/detail/sequential/copy_if.h b/thrust/system/detail/sequential/copy_if.h index 3c00956de..6db185ccb 100644 --- a/thrust/system/detail/sequential/copy_if.h +++ b/thrust/system/detail/sequential/copy_if.h @@ -33,13 +33,13 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator copy_if(sequential::execution_policy &, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/sequential/execution_policy.h b/thrust/system/detail/sequential/execution_policy.h index fed35ff0a..f6909a102 100644 --- a/thrust/system/detail/sequential/execution_policy.h +++ b/thrust/system/detail/sequential/execution_policy.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,7 +50,7 @@ template<> // tag's definition comes before the generic definition of execution_policy struct tag : execution_policy { - __host__ __device__ constexpr tag() {} + THRUST_HOST_DEVICE constexpr tag() {} }; // allow conversion to tag when it is not a successor diff --git a/thrust/system/detail/sequential/extrema.h b/thrust/system/detail/sequential/extrema.h index 5e5c62da6..d7d3e8f05 100644 --- a/thrust/system/detail/sequential/extrema.h +++ b/thrust/system/detail/sequential/extrema.h @@ -35,11 +35,11 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator min_element(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -65,11 +65,11 @@ ForwardIterator min_element(sequential::execution_policy &, } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator max_element(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -95,11 +95,11 @@ ForwardIterator max_element(sequential::execution_policy &, } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair minmax_element(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, diff --git a/thrust/system/detail/sequential/find.h b/thrust/system/detail/sequential/find.h index 54c238c71..f725ab37d 100644 --- a/thrust/system/detail/sequential/find.h +++ b/thrust/system/detail/sequential/find.h @@ -34,11 +34,11 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator find_if(execution_policy &, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/sequential/for_each.h b/thrust/system/detail/sequential/for_each.h index 7058c56f2..f29cee306 100644 --- a/thrust/system/detail/sequential/for_each.h +++ b/thrust/system/detail/sequential/for_each.h @@ -34,11 +34,11 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each(sequential::execution_policy &, InputIterator first, InputIterator last, @@ -63,7 +63,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each_n(sequential::execution_policy &, InputIterator first, Size n, diff --git a/thrust/system/detail/sequential/general_copy.h b/thrust/system/detail/sequential/general_copy.h index 6ea87bbac..146ba0d39 100644 --- a/thrust/system/detail/sequential/general_copy.h +++ b/thrust/system/detail/sequential/general_copy.h @@ -64,9 +64,9 @@ struct reference_is_assignable // introduce an iterator assign helper to deal with assignments from // a wrapped reference -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::detail::enable_if< reference_is_assignable::value >::type @@ -76,9 +76,9 @@ iter_assign(OutputIterator dst, InputIterator src) } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -inline __host__ __device__ +inline THRUST_HOST_DEVICE typename thrust::detail::disable_if< reference_is_assignable::value >::type @@ -94,10 +94,10 @@ iter_assign(OutputIterator dst, InputIterator src) } // end general_copy_detail -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator general_copy(InputIterator first, InputIterator last, OutputIterator result) @@ -116,11 +116,11 @@ __host__ __device__ } // end general_copy() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator general_copy_n(InputIterator first, Size n, OutputIterator result) diff --git a/thrust/system/detail/sequential/get_value.h b/thrust/system/detail/sequential/get_value.h index 90752d867..dec3a4ee4 100644 --- a/thrust/system/detail/sequential/get_value.h +++ b/thrust/system/detail/sequential/get_value.h @@ -30,7 +30,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_value::type get_value(sequential::execution_policy &, Pointer ptr) { diff --git a/thrust/system/detail/sequential/insertion_sort.h b/thrust/system/detail/sequential/insertion_sort.h index 9acccd8e9..0f6d41753 100644 --- a/thrust/system/detail/sequential/insertion_sort.h +++ b/thrust/system/detail/sequential/insertion_sort.h @@ -31,10 +31,10 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void insertion_sort(RandomAccessIterator first, RandomAccessIterator last, StrictWeakOrdering comp) @@ -79,11 +79,11 @@ void insertion_sort(RandomAccessIterator first, } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void insertion_sort_by_key(RandomAccessIterator1 first1, RandomAccessIterator1 last1, RandomAccessIterator2 first2, diff --git a/thrust/system/detail/sequential/iter_swap.h b/thrust/system/detail/sequential/iter_swap.h index 7a5c481fc..b9d176691 100644 --- a/thrust/system/detail/sequential/iter_swap.h +++ b/thrust/system/detail/sequential/iter_swap.h @@ -31,7 +31,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE void iter_swap(sequential::execution_policy &, Pointer1 a, Pointer2 b) { using thrust::swap; diff --git a/thrust/system/detail/sequential/malloc_and_free.h b/thrust/system/detail/sequential/malloc_and_free.h index 14f5a3b91..19944e36e 100644 --- a/thrust/system/detail/sequential/malloc_and_free.h +++ b/thrust/system/detail/sequential/malloc_and_free.h @@ -31,7 +31,7 @@ namespace sequential template -inline __host__ __device__ +inline THRUST_HOST_DEVICE void *malloc(execution_policy &, std::size_t n) { return std::malloc(n); @@ -39,7 +39,7 @@ void *malloc(execution_policy &, std::size_t n) template -inline __host__ __device__ +inline THRUST_HOST_DEVICE void free(sequential::execution_policy &, Pointer ptr) { std::free(thrust::raw_pointer_cast(ptr)); diff --git a/thrust/system/detail/sequential/merge.h b/thrust/system/detail/sequential/merge.h index a45e18004..a513a327d 100644 --- a/thrust/system/detail/sequential/merge.h +++ b/thrust/system/detail/sequential/merge.h @@ -38,7 +38,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(sequential::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -56,7 +56,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(sequential::execution_policy &exec, InputIterator1 keys_first1, diff --git a/thrust/system/detail/sequential/merge.inl b/thrust/system/detail/sequential/merge.inl index 08d7c0b0d..b8d7d27af 100644 --- a/thrust/system/detail/sequential/merge.inl +++ b/thrust/system/detail/sequential/merge.inl @@ -31,13 +31,13 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator merge(sequential::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -72,7 +72,7 @@ OutputIterator merge(sequential::execution_policy &exec, } // end merge() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair merge_by_key(sequential::execution_policy &, InputIterator1 keys_first1, diff --git a/thrust/system/detail/sequential/partition.h b/thrust/system/detail/sequential/partition.h index f5d69c0e7..451dd8fe7 100644 --- a/thrust/system/detail/sequential/partition.h +++ b/thrust/system/detail/sequential/partition.h @@ -46,10 +46,10 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE void iter_swap(ForwardIterator1 iter1, ForwardIterator2 iter2) { // XXX this isn't correct because it doesn't use thrust::swap @@ -63,11 +63,11 @@ void iter_swap(ForwardIterator1 iter1, ForwardIterator2 iter2) } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -104,12 +104,12 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -155,11 +155,11 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(sequential::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -207,12 +207,12 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(sequential::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -258,13 +258,13 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(sequential::execution_policy &, InputIterator first, @@ -297,14 +297,14 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair stable_partition_copy(sequential::execution_policy &, InputIterator1 first, diff --git a/thrust/system/detail/sequential/reduce.h b/thrust/system/detail/sequential/reduce.h index a532f71b2..1c47599c0 100644 --- a/thrust/system/detail/sequential/reduce.h +++ b/thrust/system/detail/sequential/reduce.h @@ -34,12 +34,12 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputType reduce(sequential::execution_policy &, InputIterator begin, InputIterator end, diff --git a/thrust/system/detail/sequential/reduce_by_key.h b/thrust/system/detail/sequential/reduce_by_key.h index b30eddc3f..90cf48061 100644 --- a/thrust/system/detail/sequential/reduce_by_key.h +++ b/thrust/system/detail/sequential/reduce_by_key.h @@ -30,7 +30,7 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair reduce_by_key(sequential::execution_policy &, InputIterator1 keys_first, diff --git a/thrust/system/detail/sequential/remove.h b/thrust/system/detail/sequential/remove.h index df564f15b..6e3dcc450 100644 --- a/thrust/system/detail/sequential/remove.h +++ b/thrust/system/detail/sequential/remove.h @@ -34,11 +34,11 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -76,12 +76,12 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(sequential::execution_policy &, ForwardIterator first, ForwardIterator last, @@ -125,12 +125,12 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(sequential::execution_policy &, InputIterator first, InputIterator last, @@ -158,13 +158,13 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(sequential::execution_policy &, InputIterator1 first, InputIterator1 last, diff --git a/thrust/system/detail/sequential/scan.h b/thrust/system/detail/sequential/scan.h index 0ca332b4b..6edfc5222 100644 --- a/thrust/system/detail/sequential/scan.h +++ b/thrust/system/detail/sequential/scan.h @@ -38,12 +38,12 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan(sequential::execution_policy &, InputIterator first, InputIterator last, @@ -75,13 +75,13 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan(sequential::execution_policy &, InputIterator first, InputIterator last, diff --git a/thrust/system/detail/sequential/scan_by_key.h b/thrust/system/detail/sequential/scan_by_key.h index c428c1050..9e29b4825 100644 --- a/thrust/system/detail/sequential/scan_by_key.h +++ b/thrust/system/detail/sequential/scan_by_key.h @@ -35,14 +35,14 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator inclusive_scan_by_key(sequential::execution_policy &, InputIterator1 first1, InputIterator1 last1, @@ -86,7 +86,7 @@ __host__ __device__ } -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator exclusive_scan_by_key(sequential::execution_policy &, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/sequential/set_operations.h b/thrust/system/detail/sequential/set_operations.h index 678754b45..2575e13f2 100644 --- a/thrust/system/detail/sequential/set_operations.h +++ b/thrust/system/detail/sequential/set_operations.h @@ -35,13 +35,13 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_difference(sequential::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -79,13 +79,13 @@ __host__ __device__ } // end set_difference() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_intersection(sequential::execution_policy &, InputIterator1 first1, InputIterator1 last1, @@ -123,13 +123,13 @@ __host__ __device__ } // end set_intersection() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_symmetric_difference(sequential::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, @@ -169,13 +169,13 @@ __host__ __device__ } // end set_symmetric_difference() -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator set_union(sequential::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, diff --git a/thrust/system/detail/sequential/sort.h b/thrust/system/detail/sequential/sort.h index 34cc7a8ba..ff738c04f 100644 --- a/thrust/system/detail/sequential/sort.h +++ b/thrust/system/detail/sequential/sort.h @@ -35,7 +35,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -46,7 +46,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 last1, diff --git a/thrust/system/detail/sequential/sort.inl b/thrust/system/detail/sequential/sort.inl index 4a298d876..5ed914a94 100644 --- a/thrust/system/detail/sequential/sort.inl +++ b/thrust/system/detail/sequential/sort.inl @@ -1,6 +1,6 @@ /* * Copyright 2008-2021 NVIDIA Corporation - * Modifications Copyright© 2019-2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -55,7 +55,7 @@ struct needs_reverse template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -78,7 +78,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 last1, @@ -114,7 +114,7 @@ void stable_sort_by_key(sequential::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -129,7 +129,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 last1, @@ -159,7 +159,7 @@ struct use_primitive_sort template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -182,7 +182,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 last1, diff --git a/thrust/system/detail/sequential/stable_merge_sort.h b/thrust/system/detail/sequential/stable_merge_sort.h index 64aa2bf96..0df7b0d96 100644 --- a/thrust/system/detail/sequential/stable_merge_sort.h +++ b/thrust/system/detail/sequential/stable_merge_sort.h @@ -31,7 +31,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE void stable_merge_sort(sequential::execution_policy &exec, RandomAccessIterator begin, RandomAccessIterator end, @@ -42,7 +42,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_merge_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_begin, RandomAccessIterator1 keys_end, diff --git a/thrust/system/detail/sequential/stable_merge_sort.inl b/thrust/system/detail/sequential/stable_merge_sort.inl index e6fd2daa2..30d91072b 100644 --- a/thrust/system/detail/sequential/stable_merge_sort.inl +++ b/thrust/system/detail/sequential/stable_merge_sort.inl @@ -1,6 +1,6 @@ /* * Copyright 2008-2021 NVIDIA Corporation - * Modifications Copyright© 2019-2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,7 +41,7 @@ namespace stable_merge_sort_detail template -__host__ __device__ +THRUST_HOST_DEVICE void inplace_merge(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator middle, @@ -61,7 +61,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void inplace_merge_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 middle1, @@ -92,7 +92,7 @@ void inplace_merge_by_key(sequential::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE void insertion_sort_each(RandomAccessIterator first, RandomAccessIterator last, Size partition_size, @@ -114,7 +114,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void insertion_sort_each_by_key(RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, RandomAccessIterator2 values_first, @@ -138,7 +138,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void merge_adjacent_partitions(sequential::execution_policy &exec, RandomAccessIterator1 first, RandomAccessIterator1 last, @@ -167,7 +167,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void merge_adjacent_partitions_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -203,7 +203,7 @@ void merge_adjacent_partitions_by_key(sequential::execution_policy -__host__ __device__ +THRUST_HOST_DEVICE void iterative_stable_merge_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -258,7 +258,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void iterative_stable_merge_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, @@ -317,7 +317,7 @@ void iterative_stable_merge_sort_by_key(sequential::execution_policy -__host__ __device__ +THRUST_HOST_DEVICE void recursive_stable_merge_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -342,7 +342,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void recursive_stable_merge_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 last1, @@ -371,7 +371,7 @@ void recursive_stable_merge_sort_by_key(sequential::execution_policy -__host__ __device__ +THRUST_HOST_DEVICE void stable_merge_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last, @@ -390,7 +390,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_merge_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 last1, diff --git a/thrust/system/detail/sequential/stable_primitive_sort.h b/thrust/system/detail/sequential/stable_primitive_sort.h index acbb81217..506f8fe4f 100644 --- a/thrust/system/detail/sequential/stable_primitive_sort.h +++ b/thrust/system/detail/sequential/stable_primitive_sort.h @@ -30,7 +30,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE void stable_primitive_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last); @@ -39,7 +39,7 @@ void stable_primitive_sort(sequential::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE void stable_primitive_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, diff --git a/thrust/system/detail/sequential/stable_primitive_sort.inl b/thrust/system/detail/sequential/stable_primitive_sort.inl index 9897d6798..120b75c1e 100644 --- a/thrust/system/detail/sequential/stable_primitive_sort.inl +++ b/thrust/system/detail/sequential/stable_primitive_sort.inl @@ -61,7 +61,7 @@ template template typename enable_if_bool_sort::type -__host__ __device__ +THRUST_HOST_DEVICE stable_primitive_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last) { @@ -74,7 +74,7 @@ __host__ __device__ template typename disable_if_bool_sort::type -__host__ __device__ +THRUST_HOST_DEVICE stable_primitive_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last) { @@ -86,7 +86,7 @@ __host__ __device__ struct logical_not_first { template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(Tuple t) { return !thrust::get<0>(t); @@ -98,7 +98,7 @@ template typename enable_if_bool_sort::type -__host__ __device__ +THRUST_HOST_DEVICE stable_primitive_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, RandomAccessIterator2 values_first) @@ -116,7 +116,7 @@ template typename disable_if_bool_sort::type -__host__ __device__ +THRUST_HOST_DEVICE stable_primitive_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, RandomAccessIterator2 values_first) @@ -131,7 +131,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE void stable_primitive_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last) @@ -143,7 +143,7 @@ void stable_primitive_sort(sequential::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE void stable_primitive_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, diff --git a/thrust/system/detail/sequential/stable_radix_sort.h b/thrust/system/detail/sequential/stable_radix_sort.h index 1e9713a2c..443c2013e 100644 --- a/thrust/system/detail/sequential/stable_radix_sort.h +++ b/thrust/system/detail/sequential/stable_radix_sort.h @@ -30,7 +30,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE void stable_radix_sort(sequential::execution_policy &exec, RandomAccessIterator begin, RandomAccessIterator end); @@ -39,7 +39,7 @@ void stable_radix_sort(sequential::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE void stable_radix_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 keys_begin, RandomAccessIterator1 keys_end, diff --git a/thrust/system/detail/sequential/stable_radix_sort.inl b/thrust/system/detail/sequential/stable_radix_sort.inl index 234dccee4..45ba31ea3 100644 --- a/thrust/system/detail/sequential/stable_radix_sort.inl +++ b/thrust/system/detail/sequential/stable_radix_sort.inl @@ -48,7 +48,7 @@ struct RadixEncoder : public thrust::identity template <> struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE unsigned char operator()(char x) const { if(std::numeric_limits::is_signed) @@ -65,7 +65,7 @@ struct RadixEncoder : public thrust::unary_function template <> struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE unsigned char operator()(signed char x) const { return static_cast(x) ^ static_cast(1) << (8 * sizeof(unsigned char) - 1); @@ -75,7 +75,7 @@ struct RadixEncoder : public thrust::unary_function struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE unsigned short operator()(short x) const { return static_cast(x) ^ static_cast(1) << (8 * sizeof(unsigned short) - 1); @@ -85,7 +85,7 @@ struct RadixEncoder : public thrust::unary_function struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE unsigned long operator()(long x) const { return x ^ static_cast(1) << (8 * sizeof(unsigned int) - 1); @@ -95,7 +95,7 @@ struct RadixEncoder : public thrust::unary_function template <> struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE unsigned long operator()(long x) const { return x ^ static_cast(1) << (8 * sizeof(unsigned long) - 1); @@ -105,7 +105,7 @@ struct RadixEncoder : public thrust::unary_function template <> struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE unsigned long long operator()(long long x) const { return x ^ static_cast(1) << (8 * sizeof(unsigned long long) - 1); @@ -116,7 +116,7 @@ struct RadixEncoder : public thrust::unary_function struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE thrust::detail::uint32_t operator()(float x) const { union { float f; thrust::detail::uint32_t i; } u; @@ -129,7 +129,7 @@ struct RadixEncoder : public thrust::unary_function struct RadixEncoder : public thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE thrust::detail::uint64_t operator()(double x) const { union { double f; thrust::detail::uint64_t i; } u; @@ -153,14 +153,14 @@ template EncodedType bit_shift; size_t *histogram; - __host__ __device__ + THRUST_HOST_DEVICE bucket_functor(EncodedType bit_shift, size_t *histogram) : encode(), bit_shift(bit_shift), histogram(histogram) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE size_t operator()(KeyType key) { const EncodedType x = encode(key); @@ -176,7 +176,7 @@ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE void radix_shuffle_n(sequential::execution_policy &exec, RandomAccessIterator1 first, const size_t n, @@ -201,7 +201,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void radix_shuffle_n(sequential::execution_policy &exec, RandomAccessIterator1 keys_first, RandomAccessIterator2 values_first, @@ -229,7 +229,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void radix_sort(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, @@ -348,7 +348,7 @@ struct radix_sort_dispatcher<1> template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) @@ -361,7 +361,7 @@ struct radix_sort_dispatcher<1> typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4> - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, @@ -378,7 +378,7 @@ struct radix_sort_dispatcher<2> template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) @@ -405,7 +405,7 @@ struct radix_sort_dispatcher<2> typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4> - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, @@ -435,7 +435,7 @@ struct radix_sort_dispatcher<4> template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) @@ -455,7 +455,7 @@ struct radix_sort_dispatcher<4> typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4> - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, @@ -479,7 +479,7 @@ struct radix_sort_dispatcher<8> template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) @@ -499,7 +499,7 @@ struct radix_sort_dispatcher<8> typename RandomAccessIterator2, typename RandomAccessIterator3, typename RandomAccessIterator4> - __host__ __device__ + THRUST_HOST_DEVICE void operator()(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, @@ -520,7 +520,7 @@ struct radix_sort_dispatcher<8> template -__host__ __device__ +THRUST_HOST_DEVICE void radix_sort(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, @@ -536,7 +536,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void radix_sort(sequential::execution_policy &exec, RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, @@ -554,7 +554,7 @@ void radix_sort(sequential::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE void stable_radix_sort(sequential::execution_policy &exec, RandomAccessIterator first, RandomAccessIterator last) @@ -572,7 +572,7 @@ void stable_radix_sort(sequential::execution_policy &exec, template -__host__ __device__ +THRUST_HOST_DEVICE void stable_radix_sort_by_key(sequential::execution_policy &exec, RandomAccessIterator1 first1, RandomAccessIterator1 last1, diff --git a/thrust/system/detail/sequential/trivial_copy.h b/thrust/system/detail/sequential/trivial_copy.h index 48946ec08..5439afb99 100644 --- a/thrust/system/detail/sequential/trivial_copy.h +++ b/thrust/system/detail/sequential/trivial_copy.h @@ -37,7 +37,7 @@ namespace sequential template -__host__ __device__ +THRUST_HOST_DEVICE T *trivial_copy_n(const T *first, std::ptrdiff_t n, T *result) diff --git a/thrust/system/detail/sequential/unique.h b/thrust/system/detail/sequential/unique.h index c4fe5268a..36c5ad83c 100644 --- a/thrust/system/detail/sequential/unique.h +++ b/thrust/system/detail/sequential/unique.h @@ -35,12 +35,12 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(sequential::execution_policy &, InputIterator first, InputIterator last, @@ -78,7 +78,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(sequential::execution_policy &exec, ForwardIterator first, ForwardIterator last, @@ -92,7 +92,7 @@ __host__ __device__ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(sequential::execution_policy &, ForwardIterator first, diff --git a/thrust/system/detail/sequential/unique_by_key.h b/thrust/system/detail/sequential/unique_by_key.h index d30cc7c71..43e4da8e7 100644 --- a/thrust/system/detail/sequential/unique_by_key.h +++ b/thrust/system/detail/sequential/unique_by_key.h @@ -35,14 +35,14 @@ namespace sequential { -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(sequential::execution_policy &, InputIterator1 keys_first, @@ -95,7 +95,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(sequential::execution_policy &exec, ForwardIterator1 keys_first, diff --git a/thrust/system/hip/config.h b/thrust/system/hip/config.h index 6679ea6d8..359810f0c 100644 --- a/thrust/system/hip/config.h +++ b/thrust/system/hip/config.h @@ -32,16 +32,16 @@ #if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_HIP #ifndef __HIP_DEVICE_COMPILE__ #define __THRUST_HAS_HIPRT__ 1 - #define THRUST_HIP_RUNTIME_FUNCTION __host__ __device__ __forceinline__ + #define THRUST_HIP_RUNTIME_FUNCTION THRUST_HOST_DEVICE THRUST_FORCEINLINE #define THRUST_RUNTIME_FUNCTION THRUST_HIP_RUNTIME_FUNCTION #else #define __THRUST_HAS_HIPRT__ 0 - #define THRUST_HIP_RUNTIME_FUNCTION __host__ __forceinline__ + #define THRUST_HIP_RUNTIME_FUNCTION THRUST_HOST THRUST_FORCEINLINE #define THRUST_RUNTIME_FUNCTION THRUST_HIP_RUNTIME_FUNCTION #endif #else #define __THRUST_HAS_HIPRT__ 0 - #define THRUST_HIP_RUNTIME_FUNCTION __host__ __forceinline__ + #define THRUST_HIP_RUNTIME_FUNCTION THRUST_HOST THRUST_FORCEINLINE #define THRUST_RUNTIME_FUNCTION THRUST_HIP_RUNTIME_FUNCTION #endif @@ -59,9 +59,9 @@ #define THRUST_HIP_DEVICE_CODE #endif -#define THRUST_HIP_DEVICE_FUNCTION __device__ __forceinline__ -#define THRUST_HIP_HOST_FUNCTION __host__ __forceinline__ -#define THRUST_HIP_FUNCTION __host__ __device__ __forceinline__ +#define THRUST_HIP_DEVICE_FUNCTION THRUST_DEVICE THRUST_FORCEINLINE +#define THRUST_HIP_HOST_FUNCTION THRUST_HOST THRUST_FORCEINLINE +#define THRUST_HIP_FUNCTION THRUST_HOST_DEVICE THRUST_FORCEINLINE #ifdef THRUST_HIP_DEBUG_SYNC #define THRUST_HIP_DEBUG_SYNC_FLAG true diff --git a/thrust/system/hip/detail/adjacent_difference.h b/thrust/system/hip/detail/adjacent_difference.h index 5df606792..77aa24e01 100644 --- a/thrust/system/hip/detail/adjacent_difference.h +++ b/thrust/system/hip/detail/adjacent_difference.h @@ -51,7 +51,7 @@ template -__host__ __device__ OutputIterator +THRUST_HOST_DEVICE OutputIterator adjacent_difference(const thrust::detail::execution_policy_base& exec, InputIterator first, InputIterator last, @@ -204,7 +204,7 @@ adjacent_difference(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static void par(execution_policy& policy, InputIt first, InputIt last, @@ -214,7 +214,7 @@ adjacent_difference(execution_policy& policy, result = __adjacent_difference::adjacent_difference( policy, first, last, result, binary_op); } - __device__ + THRUST_DEVICE static void seq(execution_policy& policy, InputIt first, InputIt last, diff --git a/thrust/system/hip/detail/async/for_each.h b/thrust/system/hip/detail/async/for_each.h index b93f706db..913560cd4 100644 --- a/thrust/system/hip/detail/async/for_each.h +++ b/thrust/system/hip/detail/async/for_each.h @@ -55,13 +55,13 @@ struct async_for_each_fn ForwardIt first; UnaryFunction f; - __host__ __device__ + THRUST_HOST_DEVICE async_for_each_fn(ForwardIt&& first_, UnaryFunction&& f_) : first(std::move(first_)), f(std::move(f_)) {} template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(Index idx) { f(thrust::raw_reference_cast(first[idx])); diff --git a/thrust/system/hip/detail/async/transform.h b/thrust/system/hip/detail/async/transform.h index 81a3fa439..d0ee6b5ca 100644 --- a/thrust/system/hip/detail/async/transform.h +++ b/thrust/system/hip/detail/async/transform.h @@ -56,13 +56,13 @@ struct async_transform_fn OutputIt output_; UnaryOperation op_; - __host__ __device__ + THRUST_HOST_DEVICE async_transform_fn(ForwardIt&& first, OutputIt&& output, UnaryOperation&& op) : first_(std::move(first)), output_(std::move(output)), op_(std::move(op)) {} template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(Index idx) { output_[idx] = op_(thrust::raw_reference_cast(first_[idx])); diff --git a/thrust/system/hip/detail/binary_search.h b/thrust/system/hip/detail/binary_search.h index 2ce4d88af..bac2dc075 100644 --- a/thrust/system/hip/detail/binary_search.h +++ b/thrust/system/hip/detail/binary_search.h @@ -251,7 +251,7 @@ lower_bound(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIt par(execution_policy& policy, + THRUST_HOST static OutputIt par(execution_policy& policy, HaystackIt first, HaystackIt last, NeedlesIt values_first, @@ -263,7 +263,7 @@ lower_bound(execution_policy& policy, policy, first, last, values_first, values_last, result, compare_op); } - __device__ static OutputIt seq(execution_policy& policy, + THRUST_DEVICE static OutputIt seq(execution_policy& policy, HaystackIt first, HaystackIt last, NeedlesIt values_first, @@ -318,7 +318,7 @@ upper_bound(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIt par(execution_policy& policy, + THRUST_HOST static OutputIt par(execution_policy& policy, HaystackIt first, HaystackIt last, NeedlesIt values_first, @@ -330,7 +330,7 @@ upper_bound(execution_policy& policy, policy, first, last, values_first, values_last, result, compare_op); } - __device__ static OutputIt seq(execution_policy& policy, + THRUST_DEVICE static OutputIt seq(execution_policy& policy, HaystackIt first, HaystackIt last, NeedlesIt values_first, @@ -383,7 +383,7 @@ binary_search(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIt par(execution_policy& policy, + THRUST_HOST static OutputIt par(execution_policy& policy, HaystackIt first, HaystackIt last, NeedlesIt values_first, @@ -395,7 +395,7 @@ binary_search(execution_policy& policy, policy, first, last, values_first, values_last, result, compare_op); } - __device__ static OutputIt seq(execution_policy& policy, + THRUST_DEVICE static OutputIt seq(execution_policy& policy, HaystackIt first, HaystackIt last, NeedlesIt values_first, @@ -456,7 +456,7 @@ HaystackIt lower_bound(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static HaystackIt par(execution_policy& policy, HaystackIt first, HaystackIt last, @@ -500,7 +500,7 @@ HaystackIt lower_bound(execution_policy& policy, return first + h_result; } - __device__ + THRUST_DEVICE static HaystackIt seq(execution_policy& policy, HaystackIt first, HaystackIt last, @@ -542,7 +542,7 @@ HaystackIt upper_bound(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static HaystackIt par(execution_policy& policy, HaystackIt first, HaystackIt last, @@ -587,7 +587,7 @@ HaystackIt upper_bound(execution_policy& policy, return first + h_result; } - __device__ + THRUST_DEVICE static HaystackIt seq(execution_policy& policy, HaystackIt first, HaystackIt last, @@ -627,7 +627,7 @@ bool binary_search(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static bool par(execution_policy& policy, HaystackIt first, HaystackIt last, @@ -671,7 +671,7 @@ bool binary_search(execution_policy& policy, return h_result != 0; } - __device__ + THRUST_DEVICE static bool seq(execution_policy& policy, HaystackIt first, HaystackIt last, diff --git a/thrust/system/hip/detail/copy.h b/thrust/system/hip/detail/copy.h index 494310cbe..af45277a8 100644 --- a/thrust/system/hip/detail/copy.h +++ b/thrust/system/hip/detail/copy.h @@ -36,14 +36,14 @@ THRUST_NAMESPACE_BEGIN template -__host__ __device__ +THRUST_HOST_DEVICE OutputIt copy(const thrust::detail::execution_policy_base& exec, InputIt first, InputIt last, OutputIt result); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIt copy_n(const thrust::detail::execution_policy_base& exec, InputIt first, Size n, @@ -91,7 +91,7 @@ namespace hip_rocprim #if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_HIP // D->D copy requires HIP compiler -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template OutputIterator THRUST_HIP_FUNCTION copy(execution_policy& system, InputIterator first, @@ -101,14 +101,14 @@ copy(execution_policy& system, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIterator par(execution_policy& system, + THRUST_HOST static OutputIterator par(execution_policy& system, InputIterator first, InputIterator last, OutputIterator result) { return __copy::device_to_device(system, first, last, result); } - __device__ static OutputIterator seq(execution_policy& system, + THRUST_DEVICE static OutputIterator seq(execution_policy& system, InputIterator first, InputIterator last, OutputIterator result) @@ -124,7 +124,7 @@ copy(execution_policy& system, #endif } // end copy() -__thrust_exec_check_disable__ template @@ -137,12 +137,12 @@ copy_n(execution_policy& system, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIterator + THRUST_HOST static OutputIterator par(execution_policy& system, InputIterator first, Size n, OutputIterator result) { return __copy::device_to_device(system, first, first + n, result); } - __device__ static OutputIterator + THRUST_DEVICE static OutputIterator seq(execution_policy& system, InputIterator first, Size n, OutputIterator result) { return thrust::copy_n(cvt_to_seq(derived_cast(system)), first, n, result); diff --git a/thrust/system/hip/detail/copy_if.h b/thrust/system/hip/detail/copy_if.h index c89e88fb9..f88be2c61 100644 --- a/thrust/system/hip/detail/copy_if.h +++ b/thrust/system/hip/detail/copy_if.h @@ -52,7 +52,7 @@ template -OutputIterator __host__ __device__ +OutputIterator THRUST_HOST_DEVICE copy_if(const thrust::detail::execution_policy_base& exec, InputIterator first, InputIterator last, @@ -64,7 +64,7 @@ template -OutputIterator __host__ __device__ +OutputIterator THRUST_HOST_DEVICE copy_if(const thrust::detail::execution_policy_base& exec, InputIterator1 first, InputIterator1 last, @@ -228,7 +228,7 @@ copy_if(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIterator par(execution_policy& policy, + THRUST_HOST static OutputIterator par(execution_policy& policy, InputIterator first, InputIterator last, OutputIterator result, @@ -236,7 +236,7 @@ copy_if(execution_policy& policy, { return __copy_if::copy_if(policy, first, last, result, pred); } - __device__ static OutputIterator seq(execution_policy& policy, + THRUST_DEVICE static OutputIterator seq(execution_policy& policy, InputIterator first, InputIterator last, OutputIterator result, @@ -273,7 +273,7 @@ copy_if(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIterator par(execution_policy& policy, + THRUST_HOST static OutputIterator par(execution_policy& policy, InputIterator first, InputIterator last, StencilIterator stencil, @@ -282,7 +282,7 @@ copy_if(execution_policy& policy, { return __copy_if::copy_if(policy, first, last, stencil, result, pred); } - __device__ static OutputIterator seq(execution_policy& policy, + THRUST_DEVICE static OutputIterator seq(execution_policy& policy, InputIterator first, InputIterator last, StencilIterator stencil, diff --git a/thrust/system/hip/detail/cross_system.h b/thrust/system/hip/detail/cross_system.h index 9bcaec337..f6f58b3f1 100644 --- a/thrust/system/hip/detail/cross_system.h +++ b/thrust/system/hip/detail/cross_system.h @@ -46,10 +46,10 @@ struct cross_system : execution_policy > policy1 &sys1; policy2 &sys2; - inline __host__ __device__ + inline THRUST_HOST_DEVICE cross_system(policy1 &sys1, policy2 &sys2) : sys1(sys1), sys2(sys2) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE cross_system rotate() const { return cross_system(sys2, sys1); @@ -58,7 +58,7 @@ struct cross_system : execution_policy > // Device to host. template - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto direction_of_copy( thrust::system::hip::execution_policy const& , thrust::cpp::execution_policy const& @@ -71,7 +71,7 @@ struct cross_system : execution_policy > // Host to device. template - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto direction_of_copy( thrust::cpp::execution_policy const& , thrust::system::hip::execution_policy const& @@ -84,7 +84,7 @@ struct cross_system : execution_policy > // Device to device. template - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto direction_of_copy( thrust::system::hip::execution_policy const& , thrust::system::hip::execution_policy const& @@ -97,7 +97,7 @@ struct cross_system : execution_policy > // Device to device. template - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto direction_of_copy(execution_policy const &) THRUST_DECLTYPE_RETURNS( thrust::detail::integral_constant< @@ -106,7 +106,7 @@ struct cross_system : execution_policy > ) template - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto direction_of_copy( execution_policy> const &systems ) @@ -123,7 +123,7 @@ struct cross_system : execution_policy > typename Direction = decltype(direction_of_copy(std::declval(), std::declval()))> - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto is_device_to_host_copy( ExecutionPolicy0 const& , ExecutionPolicy1 const& @@ -140,7 +140,7 @@ struct cross_system : execution_policy > // MSVC2015 WAR: put decltype here instead of in trailing return type typename Direction = decltype(direction_of_copy(std::declval()))> - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto is_device_to_host_copy(ExecutionPolicy const& ) noexcept -> thrust::detail::integral_constant< @@ -156,7 +156,7 @@ struct cross_system : execution_policy > typename Direction = decltype(direction_of_copy(std::declval(), std::declval()))> - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto is_host_to_device_copy( ExecutionPolicy0 const& , ExecutionPolicy1 const& @@ -173,7 +173,7 @@ struct cross_system : execution_policy > // MSVC2015 WAR: put decltype here instead of in trailing return type typename Direction = decltype(direction_of_copy(std::declval()))> - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto is_host_to_device_copy(ExecutionPolicy const& ) noexcept -> thrust::detail::integral_constant< @@ -189,7 +189,7 @@ struct cross_system : execution_policy > typename Direction = decltype(direction_of_copy(std::declval(), std::declval()))> - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto is_device_to_device_copy( ExecutionPolicy0 const& , ExecutionPolicy1 const& @@ -206,7 +206,7 @@ struct cross_system : execution_policy > // MSVC2015 WAR: put decltype here instead of in trailing return type typename Direction = decltype(direction_of_copy(std::declval()))> - constexpr __host__ __device__ + constexpr THRUST_HOST_DEVICE auto is_device_to_device_copy(ExecutionPolicy const& ) noexcept -> thrust::detail::integral_constant< @@ -220,37 +220,37 @@ struct cross_system : execution_policy > // Device to host. template - __host__ __device__ auto select_device_system(thrust::hip::execution_policy& sys1, + THRUST_HOST_DEVICE auto select_device_system(thrust::hip::execution_policy& sys1, thrust::execution_policy&) THRUST_DECLTYPE_RETURNS(sys1) // Device to host. template - __host__ __device__ auto select_device_system(thrust::hip::execution_policy const& sys1, + THRUST_HOST_DEVICE auto select_device_system(thrust::hip::execution_policy const& sys1, thrust::execution_policy const&) THRUST_DECLTYPE_RETURNS(sys1) // Host to device. template - __host__ __device__ auto select_device_system(thrust::execution_policy&, + THRUST_HOST_DEVICE auto select_device_system(thrust::execution_policy&, thrust::hip::execution_policy& sys2) THRUST_DECLTYPE_RETURNS(sys2) // Host to device. template - __host__ __device__ auto select_device_system(thrust::execution_policy const&, + THRUST_HOST_DEVICE auto select_device_system(thrust::execution_policy const&, thrust::hip::execution_policy const& sys2) THRUST_DECLTYPE_RETURNS(sys2) // Device to device. template - __host__ __device__ auto select_device_system(thrust::hip::execution_policy& sys1, + THRUST_HOST_DEVICE auto select_device_system(thrust::hip::execution_policy& sys1, thrust::hip::execution_policy&) THRUST_DECLTYPE_RETURNS(sys1) // Device to device. template - __host__ __device__ auto select_device_system(thrust::hip::execution_policy const& sys1, + THRUST_HOST_DEVICE auto select_device_system(thrust::hip::execution_policy const& sys1, thrust::hip::execution_policy const&) THRUST_DECLTYPE_RETURNS(sys1) @@ -258,43 +258,43 @@ struct cross_system : execution_policy > // Device to host. template - __host__ __device__ auto select_host_system(thrust::hip::execution_policy&, + THRUST_HOST_DEVICE auto select_host_system(thrust::hip::execution_policy&, thrust::execution_policy& sys2) THRUST_DECLTYPE_RETURNS(sys2) // Device to host. template - __host__ __device__ auto select_host_system(thrust::hip::execution_policy const&, + THRUST_HOST_DEVICE auto select_host_system(thrust::hip::execution_policy const&, thrust::execution_policy const& sys2) THRUST_DECLTYPE_RETURNS(sys2) // Host to device. template - __host__ __device__ auto select_host_system(thrust::execution_policy& sys1, + THRUST_HOST_DEVICE auto select_host_system(thrust::execution_policy& sys1, thrust::hip::execution_policy&) THRUST_DECLTYPE_RETURNS(sys1) // Host to device. template - __host__ __device__ auto select_host_system(thrust::execution_policy const& sys1, + THRUST_HOST_DEVICE auto select_host_system(thrust::execution_policy const& sys1, thrust::hip::execution_policy const&) THRUST_DECLTYPE_RETURNS(sys1) // Device to device. template - __host__ __device__ + THRUST_HOST_DEVICE auto select_host_system(thrust::execution_policy& sys1, thrust::execution_policy&) THRUST_DECLTYPE_RETURNS(sys1) // Device to device. template - __host__ __device__ auto select_host_system(thrust::execution_policy const& sys1, + THRUST_HOST_DEVICE auto select_host_system(thrust::execution_policy const& sys1, thrust::execution_policy const&) THRUST_DECLTYPE_RETURNS(sys1) // Device to host. template - __host__ __device__ cross_system select_system( + THRUST_HOST_DEVICE cross_system select_system( execution_policy const& sys1, thrust::cpp::execution_policy const& sys2) { thrust::execution_policy & non_const_sys1 = const_cast &>(sys1); @@ -304,7 +304,7 @@ struct cross_system : execution_policy > // Host to device. template - __host__ __device__ + THRUST_HOST_DEVICE cross_system select_system(thrust::cpp::execution_policy const &sys1, execution_policy const & sys2) diff --git a/thrust/system/hip/detail/extrema.h b/thrust/system/hip/detail/extrema.h index 1acc27974..8d5a87bd2 100644 --- a/thrust/system/hip/detail/extrema.h +++ b/thrust/system/hip/detail/extrema.h @@ -255,7 +255,7 @@ namespace __extrema } // namespace __extrema /// min element -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template ItemsIt THRUST_HIP_FUNCTION min_element(execution_policy& policy, ItemsIt first, @@ -286,7 +286,7 @@ min_element(execution_policy& policy, } /// max element -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template ItemsIt THRUST_HIP_FUNCTION max_element(execution_policy& policy, ItemsIt first, @@ -317,7 +317,7 @@ max_element(execution_policy& policy, } /// minmax element -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template pair THRUST_HIP_FUNCTION minmax_element(execution_policy& policy, ItemsIt first, diff --git a/thrust/system/hip/detail/fill.h b/thrust/system/hip/detail/fill.h index 6fbc967c0..1d7de06c3 100644 --- a/thrust/system/hip/detail/fill.h +++ b/thrust/system/hip/detail/fill.h @@ -51,7 +51,7 @@ fill_n(execution_policy& policy, thrust::make_counting_iterator(0), thrust::make_counting_iterator(count), first, - [value] __host__ __device__ (Size) { return value; }); + [value] THRUST_HOST_DEVICE (Size) { return value; }); } // func fill_n template diff --git a/thrust/system/hip/detail/future.inl b/thrust/system/hip/detail/future.inl index 8a4b95d40..ba3e8398b 100644 --- a/thrust/system/hip/detail/future.inl +++ b/thrust/system/hip/detail/future.inl @@ -46,7 +46,7 @@ THRUST_INLINE_CONSTANT nonowning_t nonowning{}; struct marker_deleter final { - __host__ + THRUST_HOST void operator()(hipEvent_t e) const { if (nullptr != e) @@ -66,7 +66,7 @@ private: public: /// \brief Create a new stream and construct a handle to it. When the handle /// is destroyed, the stream is destroyed. - __host__ + THRUST_HOST unique_marker() : handle_(nullptr, marker_deleter()) { @@ -77,29 +77,29 @@ public: handle_.reset(e); } - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE unique_marker(unique_marker const&) = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE unique_marker(unique_marker&&) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE unique_marker& operator=(unique_marker const&) = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE unique_marker& operator=(unique_marker&&) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE ~unique_marker() = default; - __host__ + THRUST_HOST auto get() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + THRUST_HOST auto native_handle() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + THRUST_HOST bool valid() const noexcept { return bool(handle_); } - __host__ + THRUST_HOST bool ready() const { hipError_t const err = hipEventQuery(handle_.get()); @@ -113,19 +113,19 @@ public: return true; } - __host__ + THRUST_HOST void wait() const { thrust::hip_rocprim::throw_on_error(hipEventSynchronize(handle_.get())); } - __host__ + THRUST_HOST bool operator==(unique_marker const& other) const { return other.handle_ == handle_; } - __host__ + THRUST_HOST bool operator!=(unique_marker const& other) const { return !(other == *this); @@ -136,7 +136,7 @@ public: struct stream_deleter final { - __host__ + THRUST_HOST void operator()(hipStream_t s) const { if (nullptr != s) @@ -150,15 +150,15 @@ private: bool cond_; public: - __host__ + THRUST_HOST constexpr stream_conditional_deleter() noexcept : cond_(true) {} - __host__ + THRUST_HOST explicit constexpr stream_conditional_deleter(nonowning_t) noexcept : cond_(false) {} - __host__ + THRUST_HOST void operator()(hipStream_t s) const { if (cond_ && nullptr != s) @@ -180,7 +180,7 @@ private: public: /// \brief Create a new stream and construct a handle to it. When the handle /// is destroyed, the stream is destroyed. - __host__ + THRUST_HOST unique_stream() : handle_(nullptr, stream_conditional_deleter()) { @@ -193,39 +193,39 @@ public: /// \brief Construct a non-owning handle to an existing stream. When the /// handle is destroyed, the stream is not destroyed. - __host__ + THRUST_HOST explicit unique_stream(nonowning_t, native_handle_type handle) : handle_(handle, stream_conditional_deleter(nonowning)) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE unique_stream(unique_stream const&) = delete; // GCC 10 complains if this is defaulted. See NVIDIA/thrust#1269. - __thrust_exec_check_disable__ - __host__ unique_stream(unique_stream &&o) noexcept + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST unique_stream(unique_stream &&o) noexcept : handle_(std::move(o.handle_)) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE unique_stream& operator=(unique_stream const&) = delete; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE unique_stream& operator=(unique_stream&&) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE ~unique_stream() = default; - __host__ + THRUST_HOST auto get() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + THRUST_HOST auto native_handle() const THRUST_DECLTYPE_RETURNS(native_handle_type(handle_.get())); - __host__ + THRUST_HOST bool valid() const noexcept { return bool(handle_); } - __host__ + THRUST_HOST bool ready() const { hipError_t const err = hipStreamQuery(handle_.get()); @@ -239,7 +239,7 @@ public: return true; } - __host__ + THRUST_HOST void wait() const { thrust::hip_rocprim::throw_on_error( @@ -247,7 +247,7 @@ public: ); } - __host__ + THRUST_HOST void depend_on(unique_marker& e) { thrust::hip_rocprim::throw_on_error( @@ -255,7 +255,7 @@ public: ); } - __host__ + THRUST_HOST void depend_on(unique_stream& s) { if (s != *this) @@ -266,19 +266,19 @@ public: } } - __host__ + THRUST_HOST void record(unique_marker& e) { thrust::hip_rocprim::throw_on_error(hipEventRecord(e.get(), handle_.get())); } - __host__ + THRUST_HOST bool operator==(unique_stream const& other) const { return other.handle_ == handle_; } - __host__ + THRUST_HOST bool operator!=(unique_stream const& other) const { return !(other == *this); @@ -324,43 +324,43 @@ struct acquired_stream final // Precondition: `device` is the current HIP device. template -__host__ +THRUST_HOST optional try_acquire_stream(int device, std::unique_ptr&) noexcept; // Precondition: `device` is the current HIP device. -inline __host__ +inline THRUST_HOST optional try_acquire_stream(int, unique_stream& stream) noexcept; // Precondition: `device` is the current HIP device. -inline __host__ +inline THRUST_HOST optional try_acquire_stream(int device, ready_event&) noexcept; // Precondition: `device` is the current HIP device. template -inline __host__ +inline THRUST_HOST optional try_acquire_stream(int device, ready_future&) noexcept; // Precondition: `device` is the current HIP device. -inline __host__ +inline THRUST_HOST optional try_acquire_stream(int device, unique_eager_event& parent) noexcept; // Precondition: `device` is the current HIP device. template -__host__ +THRUST_HOST optional try_acquire_stream(int device, unique_eager_future& parent) noexcept; template -__host__ +THRUST_HOST acquired_stream acquire_stream(int device, Dependencies&... deps) noexcept; template -__host__ +THRUST_HOST unique_eager_event make_dependent_event( std::tuple&& deps @@ -370,7 +370,7 @@ template < typename X, typename XPointer , typename ComputeContent, typename... Dependencies > -__host__ +THRUST_HOST unique_eager_future_promise_pair make_dependent_future(ComputeContent&& cc, std::tuple&& deps); @@ -383,12 +383,12 @@ protected: public: // Constructs an `async_signal` which uses `stream`. - __host__ + THRUST_HOST explicit async_signal(unique_stream&& stream) : stream_(std::move(stream)) {} - __host__ + THRUST_HOST virtual ~async_signal() {} unique_stream& stream() noexcept { return stream_; } @@ -407,7 +407,7 @@ public: // Constructs an `async_keep_alives` which uses `stream`, and keeps the // objects in the tuple `keep_alives` alive until the asynchronous signal is // destroyed. - __host__ + THRUST_HOST explicit async_keep_alives( unique_stream&& stream, keep_alives_type&& keep_alives ) @@ -415,7 +415,7 @@ public: , keep_alives_(std::move(keep_alives)) {} - __host__ + THRUST_HOST virtual ~async_keep_alives() {} }; @@ -426,24 +426,24 @@ struct async_value : virtual async_signal using raw_const_pointer = value_type const*; // Constructs an `async_value` which uses `stream` and has no content. - __host__ + THRUST_HOST explicit async_value(unique_stream stream) : async_signal(std::move(stream)) {} - __host__ + THRUST_HOST virtual ~async_value() {} - __host__ + THRUST_HOST virtual bool valid_content() const noexcept { return false; } - __host__ + THRUST_HOST virtual value_type get() { throw thrust::event_error(event_errc::no_state); } - __host__ + THRUST_HOST virtual value_type extract() { throw thrust::event_error(event_errc::no_state); @@ -451,7 +451,7 @@ struct async_value : virtual async_signal // For testing only. #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) - __host__ + THRUST_HOST virtual raw_const_pointer raw_data() const { return nullptr; @@ -493,7 +493,7 @@ public: // in generated host code, which leads to -Wreorder warnings. THRUST_DISABLE_CLANG_AND_GCC_INITIALIZER_REORDERING_WARNING_BEGIN template - __host__ + THRUST_HOST explicit async_addressable_value_with_keep_alives( unique_stream&& stream , keep_alives_type&& keep_alives @@ -509,14 +509,14 @@ public: } THRUST_DISABLE_CLANG_AND_GCC_INITIALIZER_REORDERING_WARNING_END - __host__ + THRUST_HOST bool valid_content() const noexcept final override { return nullptr != content_; } // Precondition: `true == valid_content()`. - __host__ + THRUST_HOST pointer data() { if (!valid_content()) @@ -526,7 +526,7 @@ public: } // Precondition: `true == valid_content()`. - __host__ + THRUST_HOST const_pointer data() const { if (!valid_content()) @@ -537,7 +537,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - __host__ + THRUST_HOST value_type get() final override { this->stream().wait(); @@ -546,7 +546,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - __host__ + THRUST_HOST value_type extract() final override { this->stream().wait(); @@ -555,7 +555,7 @@ public: // For testing only. #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) - __host__ + THRUST_HOST raw_const_pointer raw_data() const final override { return raw_pointer_cast(content_); @@ -586,20 +586,20 @@ private: {} public: - __host__ __device__ + THRUST_HOST_DEVICE weak_promise() : device_(0), content_{} {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE weak_promise(weak_promise const&) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE weak_promise(weak_promise&&) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE weak_promise& operator=(weak_promise const&) = default; - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE weak_promise& operator=(weak_promise&&) = default; template - __host__ __device__ + THRUST_HOST_DEVICE void set_value(U&& value) && { *content_ = THRUST_FWD(value); @@ -609,7 +609,7 @@ public: typename X, typename XPointer , typename ComputeContent, typename... Dependencies > - friend __host__ + friend THRUST_HOST unique_eager_future_promise_pair thrust::system::hip::detail::make_dependent_future( ComputeContent&& cc, std::tuple&& deps @@ -625,13 +625,13 @@ struct ready_event final ready_event() = default; template - __host__ __device__ + THRUST_HOST_DEVICE explicit ready_event(ready_future) {} - __host__ __device__ + THRUST_HOST_DEVICE static constexpr bool valid_content() noexcept { return true; } - __host__ __device__ + THRUST_HOST_DEVICE static constexpr bool ready() noexcept { return true; } }; @@ -645,7 +645,7 @@ private: value_type value_; public: - __host__ __device__ + THRUST_HOST_DEVICE ready_future() : value_{} {} ready_future(ready_future&&) = default; @@ -654,22 +654,22 @@ public: ready_future& operator=(ready_future const&) = default; template - __host__ __device__ + THRUST_HOST_DEVICE explicit ready_future(U&& u) : value_(THRUST_FWD(u)) {} - __host__ __device__ + THRUST_HOST_DEVICE static constexpr bool valid_content() noexcept { return true; } - __host__ __device__ + THRUST_HOST_DEVICE static constexpr bool ready() noexcept { return true; } - __host__ __device__ + THRUST_HOST_DEVICE value_type get() const { return value_; } - THRUST_NODISCARD __host__ __device__ + THRUST_NODISCARD THRUST_HOST_DEVICE value_type extract() { return std::move(value_); @@ -677,7 +677,7 @@ public: #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) // For testing only. - __host__ __device__ + THRUST_HOST_DEVICE raw_const_pointer data() const { return addressof(value_); @@ -691,7 +691,7 @@ protected: int device_ = 0; std::unique_ptr async_signal_; - __host__ + THRUST_HOST explicit unique_eager_event( int device, std::unique_ptr async_signal ) @@ -699,7 +699,7 @@ protected: {} public: - __host__ + THRUST_HOST unique_eager_event() : device_(0), async_signal_() {} @@ -712,13 +712,13 @@ public: // Any `unique_eager_future` can be explicitly converted to a // `unique_eager_event`. template - __host__ + THRUST_HOST explicit unique_eager_event(unique_eager_future&& other) // NOTE: We upcast to `unique_ptr` here. : device_(other.where()), async_signal_(std::move(other.async_signal_)) {} - __host__ + THRUST_HOST // NOTE: We take `new_stream_t` by `const&` because it is incomplete here. explicit unique_eager_event(new_stream_t const&) : device_(0) @@ -727,7 +727,7 @@ public: thrust::hip_rocprim::throw_on_error(hipGetDevice(&device_)); } - __host__ + THRUST_HOST virtual ~unique_eager_event() { // FIXME: If we could asynchronously handle destruction of keep alives, we @@ -735,13 +735,13 @@ public: if (valid_stream()) wait(); } - __host__ + THRUST_HOST bool valid_stream() const noexcept { return bool(async_signal_); } - __host__ + THRUST_HOST bool ready() const noexcept { if (valid_stream()) @@ -751,7 +751,7 @@ public: } // Precondition: `true == valid_stream()`. - __host__ + THRUST_HOST detail::unique_stream& stream() { if (!valid_stream()) @@ -767,24 +767,24 @@ public: return async_signal_->stream(); } - __host__ + THRUST_HOST int where() const noexcept { return device_; } // Precondition: `true == valid_stream()`. - __host__ + THRUST_HOST void wait() { stream().wait(); } - friend __host__ + friend THRUST_HOST optional thrust::system::hip::detail::try_acquire_stream( int device, unique_eager_event& parent ) noexcept; template - friend __host__ + friend THRUST_HOST unique_eager_event thrust::system::hip::detail::make_dependent_event( std::tuple&& deps @@ -806,7 +806,7 @@ private: int device_ = 0; std::unique_ptr> async_signal_; - __host__ + THRUST_HOST explicit unique_eager_future( int device_id, std::unique_ptr> async_signal ) @@ -814,7 +814,7 @@ private: {} public: - __host__ + THRUST_HOST unique_eager_future() : device_(0), async_signal_() {} @@ -824,7 +824,7 @@ public: unique_eager_future& operator=(unique_eager_future&&) = default; unique_eager_future& operator=(unique_eager_future const&) = delete; - __host__ + THRUST_HOST // NOTE: We take `new_stream_t` by `const&` because it is incomplete here. explicit unique_eager_future(new_stream_t const&) : device_(0) @@ -833,7 +833,7 @@ public: thrust::hip_rocprim::throw_on_error(hipGetDevice(&device_)); } - __host__ + THRUST_HOST ~unique_eager_future() { // FIXME: If we could asynchronously handle destruction of keep alives, we @@ -841,13 +841,13 @@ public: if (valid_stream()) wait(); } - __host__ + THRUST_HOST bool valid_stream() const noexcept { return bool(async_signal_); } - __host__ + THRUST_HOST bool valid_content() const noexcept { if (!valid_stream()) @@ -859,7 +859,7 @@ public: } // Precondition: `true == valid_stream()`. - __host__ + THRUST_HOST bool ready() const noexcept { if (valid_stream()) @@ -869,7 +869,7 @@ public: } // Precondition: `true == valid_stream()`. - __host__ + THRUST_HOST detail::unique_stream& stream() { if (!valid_stream()) @@ -877,7 +877,7 @@ public: return async_signal_->stream(); } - __host__ + THRUST_HOST detail::unique_stream const& stream() const { if (!valid_stream()) @@ -886,12 +886,12 @@ public: return async_signal_->stream(); } - __host__ + THRUST_HOST int where() const noexcept { return device_; } // Blocks. // Precondition: `true == valid_stream()`. - __host__ + THRUST_HOST void wait() { stream().wait(); @@ -899,7 +899,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - __host__ + THRUST_HOST value_type get() { if (!valid_content()) @@ -910,7 +910,7 @@ public: // Blocks. // Precondition: `true == valid_content()`. - THRUST_NODISCARD __host__ + THRUST_NODISCARD THRUST_HOST value_type extract() { if (!valid_content()) @@ -924,7 +924,7 @@ public: // For testing only. #if defined(THRUST_ENABLE_FUTURE_RAW_DATA_MEMBER) // Precondition: `true == valid_stream()`. - __host__ + THRUST_HOST raw_const_pointer raw_data() const { if (!valid_stream()) @@ -935,7 +935,7 @@ public: #endif template - friend __host__ + friend THRUST_HOST optional thrust::system::hip::detail::try_acquire_stream( int device_id, unique_eager_future& parent @@ -945,7 +945,7 @@ public: typename X, typename XPointer , typename ComputeContent, typename... Dependencies > - friend __host__ + friend THRUST_HOST detail::unique_eager_future_promise_pair thrust::system::hip::detail::make_dependent_future( ComputeContent&& cc, std::tuple&& deps @@ -959,7 +959,7 @@ public: namespace detail { template -__host__ +THRUST_HOST optional try_acquire_stream(int, std::unique_ptr&) noexcept { @@ -967,14 +967,14 @@ try_acquire_stream(int, std::unique_ptr&) noexcept return {}; } -inline __host__ +inline THRUST_HOST optional try_acquire_stream(int, unique_stream& stream) noexcept { return {std::move(stream)}; } -inline __host__ +inline THRUST_HOST optional try_acquire_stream(int, ready_event&) noexcept { @@ -983,7 +983,7 @@ try_acquire_stream(int, ready_event&) noexcept } template -__host__ +THRUST_HOST optional try_acquire_stream(int, ready_future&) noexcept { @@ -991,7 +991,7 @@ try_acquire_stream(int, ready_future&) noexcept return {}; } -__host__ +THRUST_HOST optional try_acquire_stream(int device_id, unique_eager_event& parent) noexcept { @@ -1005,7 +1005,7 @@ try_acquire_stream(int device_id, unique_eager_event& parent) noexcept } template -__host__ +THRUST_HOST optional try_acquire_stream(int device_id, unique_eager_future& parent) noexcept { @@ -1021,7 +1021,7 @@ try_acquire_stream(int device_id, unique_eager_future& parent) noexcept /////////////////////////////////////////////////////////////////////////////// template -__host__ +THRUST_HOST acquired_stream acquire_stream_impl( int, std::tuple&, index_sequence<> ) noexcept @@ -1032,7 +1032,7 @@ acquired_stream acquire_stream_impl( } template -__host__ +THRUST_HOST acquired_stream acquire_stream_impl( int device_id , std::tuple& deps, index_sequence @@ -1047,7 +1047,7 @@ acquired_stream acquire_stream_impl( } template -__host__ +THRUST_HOST acquired_stream acquire_stream( int device_id , std::tuple& deps @@ -1061,26 +1061,26 @@ acquired_stream acquire_stream( /////////////////////////////////////////////////////////////////////////////// template -__host__ +THRUST_HOST void create_dependency( unique_stream&, std::unique_ptr& ) noexcept {} -inline __host__ +inline THRUST_HOST void create_dependency( unique_stream&, ready_event& ) noexcept {} template -__host__ +THRUST_HOST void create_dependency( unique_stream&, ready_future& ) noexcept {} -inline __host__ +inline THRUST_HOST void create_dependency( unique_stream& child, unique_stream& parent ) @@ -1088,7 +1088,7 @@ void create_dependency( child.depend_on(parent); } -inline __host__ +inline THRUST_HOST void create_dependency( unique_stream& child, unique_eager_event& parent ) @@ -1097,7 +1097,7 @@ void create_dependency( } template -__host__ +THRUST_HOST void create_dependency( unique_stream& child, unique_eager_future& parent ) @@ -1106,7 +1106,7 @@ void create_dependency( } template -__host__ +THRUST_HOST void create_dependencies_impl( acquired_stream& , std::tuple&, index_sequence<> @@ -1114,7 +1114,7 @@ void create_dependencies_impl( {} template -__host__ +THRUST_HOST void create_dependencies_impl( acquired_stream& as , std::tuple& deps, index_sequence @@ -1131,7 +1131,7 @@ void create_dependencies_impl( } template -__host__ +THRUST_HOST void create_dependencies(acquired_stream& as, std::tuple& deps) { create_dependencies_impl( @@ -1264,7 +1264,7 @@ struct find_keep_alives_impl< /////////////////////////////////////////////////////////////////////////////// template -__host__ +THRUST_HOST unique_eager_event make_dependent_event(std::tuple&& deps) { int device_id = 0; @@ -1300,7 +1300,7 @@ template < typename X, typename XPointer , typename ComputeContent, typename... Dependencies > -__host__ +THRUST_HOST unique_eager_future_promise_pair make_dependent_future(ComputeContent&& cc, std::tuple&& deps) { @@ -1342,7 +1342,7 @@ make_dependent_future(ComputeContent&& cc, std::tuple&& deps) /////////////////////////////////////////////////////////////////////////////// template -__host__ +THRUST_HOST unique_eager_event when_all(Events&&... evs) // TODO: Constrain to events, futures, and maybe streams (currently allows keep // alives). @@ -1351,13 +1351,13 @@ unique_eager_event when_all(Events&&... evs) } // ADL hook for transparent `.after` move support. -inline __host__ +inline THRUST_HOST auto capture_as_dependency(unique_eager_event& dependency) THRUST_DECLTYPE_RETURNS(std::move(dependency)) // ADL hook for transparent `.after` move support. template -__host__ +THRUST_HOST auto capture_as_dependency(unique_eager_future& dependency) THRUST_DECLTYPE_RETURNS(std::move(dependency)) diff --git a/thrust/system/hip/detail/general/various.h b/thrust/system/hip/detail/general/various.h index 337acf4f0..c1e0dab77 100644 --- a/thrust/system/hip/detail/general/various.h +++ b/thrust/system/hip/detail/general/various.h @@ -32,18 +32,18 @@ template ::value && std::is_unsigned::value, int> = 0> -__host__ __device__ inline constexpr auto ceiling_div(const T a, const U b) +THRUST_HOST_DEVICE inline constexpr auto ceiling_div(const T a, const U b) { return a / b + (a % b > 0 ? 1 : 0); } -__host__ __device__ inline size_t align_size(size_t size, size_t alignment = 256) +THRUST_HOST_DEVICE inline size_t align_size(size_t size, size_t alignment = 256) { return ceiling_div(size, alignment) * alignment; } template -__host__ __device__ inline void +THRUST_HOST_DEVICE inline void apply_to_each_in_tuple_impl(Tuple&& t, Function&& f, thrust::index_sequence) { int swallow[] @@ -52,7 +52,7 @@ apply_to_each_in_tuple_impl(Tuple&& t, Function&& f, thrust::index_sequence -__host__ __device__ inline auto apply_to_each_in_tuple(Tuple&& t, Function&& f) +THRUST_HOST_DEVICE inline auto apply_to_each_in_tuple(Tuple&& t, Function&& f) -> void_t>> { static constexpr size_t size = tuple_size>::value; diff --git a/thrust/system/hip/detail/guarded_driver_types.h b/thrust/system/hip/detail/guarded_driver_types.h index ad45b4d36..8203b53f3 100644 --- a/thrust/system/hip/detail/guarded_driver_types.h +++ b/thrust/system/hip/detail/guarded_driver_types.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,24 +24,24 @@ // carefully save their definitions and restore them -#ifdef __host__ -# pragma push_macro("__host__") -# undef __host__ +#ifdef THRUST_HOST +# pragma push_macro("THRUST_HOST") +# undef THRUST_HOST # define THRUST_HOST_NEEDS_RESTORATION #endif -#ifdef __device__ -# pragma push_macro("__device__") -# undef __device__ +#ifdef THRUST_DEVICE +# pragma push_macro("THRUST_DEVICE") +# undef THRUST_DEVICE # define THRUST_DEVICE_NEEDS_RESTORATION #endif #include #ifdef THRUST_HOST_NEEDS_RESTORATION -# pragma pop_macro("__host__") +# pragma pop_macro("THRUST_HOST") # undef THRUST_HOST_NEEDS_RESTORATION #endif #ifdef THRUST_DEVICE_NEEDS_RESTORATION -# pragma pop_macro("__device__") +# pragma pop_macro("THRUST_DEVICE") # undef THRUST_DEVICE_NEEDS_RESTORATION #endif diff --git a/thrust/system/hip/detail/guarded_hip_runtime_api.h b/thrust/system/hip/detail/guarded_hip_runtime_api.h index a69c16a08..9bdcb4cc5 100644 --- a/thrust/system/hip/detail/guarded_hip_runtime_api.h +++ b/thrust/system/hip/detail/guarded_hip_runtime_api.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,13 +25,13 @@ #if !defined(HIP_INCLUDE_HIP_AMD_DETAIL_HOST_DEFINES_H) -#ifdef __host__ -#undef __host__ -#endif // __host__ +#ifdef THRUST_HOST +#undef THRUST_HOST +#endif // THRUST_HOST -#ifdef __device__ -#undef __device__ -#endif // __device__ +#ifdef THRUST_DEVICE +#undef THRUST_DEVICE +#endif // THRUST_DEVICE #endif // __HOST_DEFINES_H__ diff --git a/thrust/system/hip/detail/internal/copy_cross_system.h b/thrust/system/hip/detail/internal/copy_cross_system.h index c524226dc..d0cb88b84 100644 --- a/thrust/system/hip/detail/internal/copy_cross_system.h +++ b/thrust/system/hip/detail/internal/copy_cross_system.h @@ -78,7 +78,7 @@ namespace __copy } template - OutputIt __host__ /* WORKAROUND */ __device__ + OutputIt THRUST_HOST /* WORKAROUND */ THRUST_DEVICE cross_system_copy_n(thrust::execution_policy& sys1, thrust::execution_policy& sys2, InputIt begin, @@ -151,7 +151,7 @@ namespace __copy // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIt par(thrust::cpp::execution_policy& host_s, + THRUST_HOST static OutputIt par(thrust::cpp::execution_policy& host_s, thrust::hip_rocprim::execution_policy& device_s, InputIt first, Size num_items, @@ -160,7 +160,7 @@ namespace __copy return cross_system_copy_n_hd_nt(host_s, device_s, first, num_items, result); } - __device__ static OutputIt seq(thrust::cpp::execution_policy& host_s, + THRUST_DEVICE static OutputIt seq(thrust::cpp::execution_policy& host_s, thrust::hip_rocprim::execution_policy& device_s, InputIt first, Size num_items, @@ -230,7 +230,7 @@ cross_system_copy_n(thrust::hip_rocprim::execution_policy& device_s, // struct workaround is required for HIP-clang struct workaround { - __host__ static void par(thrust::hip_rocprim::execution_policy& device_s, + THRUST_HOST static void par(thrust::hip_rocprim::execution_policy& device_s, thrust::cpp::execution_policy& host_s, InputIt first, Size num_items, @@ -239,7 +239,7 @@ cross_system_copy_n(thrust::hip_rocprim::execution_policy& device_s, result = cross_system_copy_n_dh_nt(device_s, host_s, first, num_items, result); } - __device__ static void seq(thrust::hip_rocprim::execution_policy& device_s, + THRUST_DEVICE static void seq(thrust::hip_rocprim::execution_policy& device_s, thrust::cpp::execution_policy& host_s, InputIt first, Size num_items, diff --git a/thrust/system/hip/detail/malloc_and_free.h b/thrust/system/hip/detail/malloc_and_free.h index 5a902a97a..0d1b8f162 100644 --- a/thrust/system/hip/detail/malloc_and_free.h +++ b/thrust/system/hip/detail/malloc_and_free.h @@ -53,7 +53,7 @@ namespace hip_rocprim // note that malloc returns a raw pointer to avoid // depending on the heavyweight thrust/system/hip/memory.h header template -void* __host__ __device__ +void* THRUST_HOST_DEVICE malloc(execution_policy&, std::size_t n) { void* result = 0; @@ -90,7 +90,7 @@ malloc(execution_policy&, std::size_t n) } // end malloc() template -void __host__ __device__ +void THRUST_HOST_DEVICE free(execution_policy&, Pointer ptr) { // No caching allocator in rocPRIM diff --git a/thrust/system/hip/detail/merge.h b/thrust/system/hip/detail/merge.h index ab770d479..b74829923 100644 --- a/thrust/system/hip/detail/merge.h +++ b/thrust/system/hip/detail/merge.h @@ -225,7 +225,7 @@ namespace __merge //------------------------- // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static ResultIt par(execution_policy& policy, + THRUST_HOST static ResultIt par(execution_policy& policy, KeysIt1 keys1_first, KeysIt1 keys1_last, KeysIt2 keys2_first, @@ -254,7 +254,7 @@ merge(execution_policy& policy, return __merge::merge( policy, keys1_first, keys1_last, keys2_first, keys2_last, result, compare_op); } - __device__ static ResultIt seq(execution_policy& policy, + THRUST_DEVICE static ResultIt seq(execution_policy& policy, KeysIt1 keys1_first, KeysIt1 keys1_last, KeysIt2 keys2_first, @@ -278,7 +278,7 @@ merge(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static pair par(execution_policy& policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -324,7 +324,7 @@ merge_by_key(execution_policy& policy, items_result, compare_op); } - __device__ + THRUST_DEVICE static pair seq(execution_policy& policy, KeysIt1 keys1_first, KeysIt1 keys1_last, @@ -359,7 +359,7 @@ merge_by_key(execution_policy& policy, } -__thrust_exec_check_disable__ template diff --git a/thrust/system/hip/detail/par.h b/thrust/system/hip/detail/par.h index 3b8bf5391..d8c98cfcd 100644 --- a/thrust/system/hip/detail/par.h +++ b/thrust/system/hip/detail/par.h @@ -45,8 +45,8 @@ namespace hip_rocprim hipStream_t stream; public: - __thrust_exec_check_disable__ - __host__ __device__ execute_on_stream_base(hipStream_t stream_ = default_stream()) + THRUST_EXEC_CHECK_DISABLE + THRUST_HOST_DEVICE execute_on_stream_base(hipStream_t stream_ = default_stream()) : stream(stream_) { } @@ -61,7 +61,7 @@ namespace hip_rocprim } private: - friend hipStream_t __host__ __device__ + friend hipStream_t THRUST_HOST_DEVICE get_stream(const execute_on_stream_base& exec) { return exec.stream; @@ -72,9 +72,9 @@ namespace hip_rocprim { typedef execute_on_stream_base base_t; - __host__ __device__ execute_on_stream() + THRUST_HOST_DEVICE execute_on_stream() : base_t() {}; - __host__ __device__ execute_on_stream(hipStream_t stream) + THRUST_HOST_DEVICE execute_on_stream(hipStream_t stream) : base_t(stream) {}; }; @@ -85,13 +85,13 @@ namespace hip_rocprim typedef execute_on_stream_base base_t; public: - __host__ __device__ execute_on_stream_nosync_base() + THRUST_HOST_DEVICE execute_on_stream_nosync_base() : base_t() {}; - __host__ __device__ execute_on_stream_nosync_base(hipStream_t stream) + THRUST_HOST_DEVICE execute_on_stream_nosync_base(hipStream_t stream) : base_t(stream) {}; private: - friend __host__ __device__ + friend THRUST_HOST_DEVICE bool must_perform_optional_stream_synchronization(const execute_on_stream_nosync_base &) { @@ -103,9 +103,9 @@ namespace hip_rocprim { typedef execute_on_stream_nosync_base base_t; - __host__ __device__ + THRUST_HOST_DEVICE execute_on_stream_nosync() : base_t(){}; - __host__ __device__ execute_on_stream_nosync(hipStream_t stream) + THRUST_HOST_DEVICE execute_on_stream_nosync(hipStream_t stream) : base_t(stream) {}; }; @@ -115,7 +115,7 @@ namespace hip_rocprim { typedef execution_policy base_t; - __device__ __host__ + THRUST_DEVICE THRUST_HOST constexpr par_t() : base_t() { } @@ -137,7 +137,7 @@ namespace hip_rocprim { typedef execution_policy base_t; - __host__ __device__ + THRUST_HOST_DEVICE constexpr par_nosync_t() : base_t() {} typedef execute_on_stream_nosync stream_attachment_type; @@ -152,7 +152,7 @@ namespace hip_rocprim private: //this function is defined to allow non-blocking calls on the default_stream() with thrust::cuda::par_nosync //without explicitly using thrust::cuda::par_nosync.on(default_stream()) - friend __host__ __device__ bool + friend THRUST_HOST_DEVICE bool must_perform_optional_stream_synchronization(const par_nosync_t &) { return false; @@ -166,13 +166,13 @@ namespace hip_rocprim typedef execute_on_stream_base base_t; public: - __host__ __device__ execute_on_stream_deterministic_base() + THRUST_HOST_DEVICE execute_on_stream_deterministic_base() : base_t() {}; - __host__ __device__ execute_on_stream_deterministic_base(hipStream_t stream) + THRUST_HOST_DEVICE execute_on_stream_deterministic_base(hipStream_t stream) : base_t(stream) {}; private: - friend __host__ __device__ integral_constant + friend THRUST_HOST_DEVICE integral_constant allows_nondeterminism(const execute_on_stream_deterministic_base&) { return {}; @@ -184,9 +184,9 @@ namespace hip_rocprim { typedef execute_on_stream_deterministic_base base_t; - __host__ __device__ execute_on_stream_deterministic() + THRUST_HOST_DEVICE execute_on_stream_deterministic() : base_t() {}; - __host__ __device__ execute_on_stream_deterministic(hipStream_t stream) + THRUST_HOST_DEVICE execute_on_stream_deterministic(hipStream_t stream) : base_t(stream) {}; }; @@ -197,7 +197,7 @@ namespace hip_rocprim { typedef execution_policy base_t; - __host__ __device__ constexpr par_det_t() + THRUST_HOST_DEVICE constexpr par_det_t() : base_t() { } @@ -211,7 +211,7 @@ namespace hip_rocprim } private: - friend __host__ __device__ integral_constant + friend THRUST_HOST_DEVICE integral_constant allows_nondeterminism(const par_det_t&) { return {}; @@ -225,13 +225,13 @@ namespace hip_rocprim typedef execute_on_stream_nosync_base base_t; public: - __host__ __device__ execute_on_stream_nosync_deterministic_base() + THRUST_HOST_DEVICE execute_on_stream_nosync_deterministic_base() : base_t() {}; - __host__ __device__ execute_on_stream_nosync_deterministic_base(hipStream_t stream) + THRUST_HOST_DEVICE execute_on_stream_nosync_deterministic_base(hipStream_t stream) : base_t(stream) {}; private: - friend __host__ __device__ integral_constant + friend THRUST_HOST_DEVICE integral_constant allows_nondeterminism(const execute_on_stream_nosync_deterministic_base&) { return {}; @@ -244,9 +244,9 @@ namespace hip_rocprim typedef execute_on_stream_nosync_deterministic_base base_t; - __host__ __device__ execute_on_stream_nosync_deterministic() + THRUST_HOST_DEVICE execute_on_stream_nosync_deterministic() : base_t() {}; - __host__ __device__ execute_on_stream_nosync_deterministic(hipStream_t stream) + THRUST_HOST_DEVICE execute_on_stream_nosync_deterministic(hipStream_t stream) : base_t(stream) {}; }; @@ -258,7 +258,7 @@ namespace hip_rocprim { typedef execution_policy base_t; - __host__ __device__ constexpr par_det_nosync_t() + THRUST_HOST_DEVICE constexpr par_det_nosync_t() : base_t() { } @@ -272,7 +272,7 @@ namespace hip_rocprim } private: - friend __host__ __device__ integral_constant + friend THRUST_HOST_DEVICE integral_constant allows_nondeterminism(const par_det_nosync_t&) { return {}; diff --git a/thrust/system/hip/detail/parallel_for.h b/thrust/system/hip/detail/parallel_for.h index fcfb1deee..da0d7654a 100644 --- a/thrust/system/hip/detail/parallel_for.h +++ b/thrust/system/hip/detail/parallel_for.h @@ -125,7 +125,7 @@ namespace __parallel_for } } // __parallel_for -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template void THRUST_HIP_FUNCTION parallel_for(execution_policy& policy, F f, Size count) { @@ -137,7 +137,7 @@ parallel_for(execution_policy& policy, F f, Size count) // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static void par(execution_policy& policy, F f, Size count) { hipStream_t stream = hip_rocprim::stream(policy); @@ -147,7 +147,7 @@ parallel_for(execution_policy& policy, F f, Size count) "parallel_for: failed to synchronize"); } - __device__ + THRUST_DEVICE static void seq(execution_policy& policy, F f, Size count) { (void)policy; diff --git a/thrust/system/hip/detail/partition.h b/thrust/system/hip/detail/partition.h index 6669aaf2e..952c19364 100644 --- a/thrust/system/hip/detail/partition.h +++ b/thrust/system/hip/detail/partition.h @@ -407,7 +407,7 @@ namespace __partition // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static pair par(execution_policy& policy, + THRUST_HOST static pair par(execution_policy& policy, InputIt first, InputIt last, StencilIt stencil, @@ -437,7 +437,7 @@ partition_copy(execution_policy& policy, policy, first, last, stencil, selected_result, rejected_result, predicate); } - __device__ static pair seq(execution_policy& policy, + THRUST_DEVICE static pair seq(execution_policy& policy, InputIt first, InputIt last, StencilIt stencil, @@ -461,7 +461,7 @@ partition_copy(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static pair par( execution_policy& policy, InputIt first, @@ -490,7 +490,7 @@ partition_copy(execution_policy& policy, policy, first, last, selected_result, rejected_result, predicate); } - __device__ + THRUST_DEVICE static pair seq( execution_policy& policy, InputIt first, @@ -514,7 +514,7 @@ partition_copy(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static pair par(execution_policy& policy, + THRUST_HOST static pair par(execution_policy& policy, InputIt first, InputIt last, SelectedOutIt selected_result, @@ -541,7 +541,7 @@ stable_partition_copy(execution_policy& policy, policy, first, last, selected_result, rejected_result, predicate); } - __device__ static pair seq(execution_policy& policy, + THRUST_DEVICE static pair seq(execution_policy& policy, InputIt first, InputIt last, SelectedOutIt selected_result, @@ -563,7 +563,7 @@ stable_partition_copy(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static pair par( execution_policy& policy, InputIt first, @@ -594,7 +594,7 @@ stable_partition_copy(execution_policy& policy, return __partition::partition_copy( policy, first, last, stencil, selected_result, rejected_result, predicate); } - __device__ + THRUST_DEVICE static pair seq( execution_policy& policy, InputIt first, @@ -623,7 +623,7 @@ stable_partition_copy(execution_policy& policy, } /// inplace -__thrust_exec_check_disable__ template @@ -637,7 +637,7 @@ partition(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static Iterator par(execution_policy& policy, Iterator first, Iterator last, @@ -654,7 +654,7 @@ partition(execution_policy& policy, hip_rocprim::reverse(policy, result, last); return result; } - __device__ + THRUST_DEVICE static Iterator seq(execution_policy& policy, Iterator first, Iterator last, @@ -677,7 +677,7 @@ partition(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template Iterator THRUST_HIP_FUNCTION partition(execution_policy& policy, Iterator first, @@ -687,7 +687,7 @@ partition(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static Iterator par(execution_policy& policy, Iterator first, Iterator last, @@ -695,7 +695,7 @@ partition(execution_policy& policy, { return __partition::partition_inplace(policy, first, last, predicate); } - __device__ + THRUST_DEVICE static Iterator seq(execution_policy& policy, Iterator first, Iterator last, @@ -717,7 +717,7 @@ partition(execution_policy& policy, } -__thrust_exec_check_disable__ template @@ -731,7 +731,7 @@ stable_partition(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static Iterator par(execution_policy& policy, Iterator first, Iterator last, @@ -743,7 +743,7 @@ stable_partition(execution_policy& policy, return result; } - __device__ + THRUST_DEVICE static Iterator seq(execution_policy& policy, Iterator first, Iterator last, @@ -766,7 +766,7 @@ stable_partition(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template Iterator THRUST_HIP_FUNCTION stable_partition(execution_policy& policy, Iterator first, @@ -776,7 +776,7 @@ stable_partition(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static Iterator par(execution_policy& policy, Iterator first, Iterator last, @@ -787,7 +787,7 @@ stable_partition(execution_policy& policy, return result; } - __device__ + THRUST_DEVICE static Iterator seq(execution_policy& policy, Iterator first, Iterator last, diff --git a/thrust/system/hip/detail/per_device_resource.h b/thrust/system/hip/detail/per_device_resource.h index 1b1ba2880..2b759f847 100644 --- a/thrust/system/hip/detail/per_device_resource.h +++ b/thrust/system/hip/detail/per_device_resource.h @@ -1,6 +1,6 @@ /****************************************************************************** * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. - * Modifications Copyright (c) 2020-2023, Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright (c) 2020-2024, Advanced Micro Devices, Inc. All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright @@ -44,7 +44,7 @@ namespace hip_rocprim { template -__host__ +THRUST_HOST MR * get_per_device_resource(execution_policy&) { static std::mutex map_lock; diff --git a/thrust/system/hip/detail/reduce.h b/thrust/system/hip/detail/reduce.h index 1e988af7b..688a3014e 100644 --- a/thrust/system/hip/detail/reduce.h +++ b/thrust/system/hip/detail/reduce.h @@ -57,7 +57,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE T reduce(const thrust::detail::execution_policy_base& exec, InputIterator first, InputIterator last, @@ -146,7 +146,7 @@ T reduce_n(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static T par(execution_policy& policy, + THRUST_HOST static T par(execution_policy& policy, InputIt first, Size num_items, T init, @@ -154,7 +154,7 @@ T reduce_n(execution_policy& policy, { return __reduce::reduce(policy, first, num_items, init, binary_op); } - __device__ static T seq(execution_policy& policy, + THRUST_DEVICE static T seq(execution_policy& policy, InputIt first, Size num_items, T init, diff --git a/thrust/system/hip/detail/reduce_by_key.h b/thrust/system/hip/detail/reduce_by_key.h index 2aedb2936..7538336ef 100644 --- a/thrust/system/hip/detail/reduce_by_key.h +++ b/thrust/system/hip/detail/reduce_by_key.h @@ -58,7 +58,7 @@ template -__host__ __device__ thrust::pair +THRUST_HOST_DEVICE thrust::pair reduce_by_key(const thrust::detail::execution_policy_base& exec, InputIterator1 keys_first, InputIterator1 keys_last, @@ -233,7 +233,7 @@ namespace __reduce_by_key // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static pair par(execution_policy& policy, + THRUST_HOST static pair par(execution_policy& policy, KeyInputIt keys_first, KeyInputIt keys_last, ValInputIt values_first, @@ -272,7 +272,7 @@ reduce_by_key(execution_policy& policy, binary_pred, binary_op); } - __device__ static pair seq(execution_policy& policy, + THRUST_DEVICE static pair seq(execution_policy& policy, KeyInputIt keys_first, KeyInputIt keys_last, ValInputIt values_first, @@ -299,7 +299,7 @@ reduce_by_key(execution_policy& policy, } -__thrust_exec_check_disable__ template & policy, plus()); } -__thrust_exec_check_disable__ template - ResultIt __host__ __device__ + ResultIt THRUST_HOST_DEVICE reverse_copy(execution_policy& policy, ItemsIt first, ItemsIt last, ResultIt result); template - void __host__ __device__ + void THRUST_HOST_DEVICE reverse(execution_policy& policy, ItemsIt first, ItemsIt last); diff --git a/thrust/system/hip/detail/scan.h b/thrust/system/hip/detail/scan.h index e57c8d89f..8e72c95c8 100644 --- a/thrust/system/hip/detail/scan.h +++ b/thrust/system/hip/detail/scan.h @@ -292,7 +292,7 @@ inclusive_scan_n(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIt par(execution_policy& policy, + THRUST_HOST static OutputIt par(execution_policy& policy, InputIt input_it, Size num_items, OutputIt result, @@ -300,7 +300,7 @@ inclusive_scan_n(execution_policy& policy, { return __scan::inclusive_scan(policy, input_it, result, num_items, scan_op); } - __device__ static OutputIt seq(execution_policy& policy, + THRUST_DEVICE static OutputIt seq(execution_policy& policy, InputIt input_it, Size num_items, OutputIt result, @@ -348,7 +348,7 @@ inclusive_scan(execution_policy& policy, thrust::plus<>{}); } -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIt par(execution_policy& policy, + THRUST_HOST static OutputIt par(execution_policy& policy, InputIt first, Size num_items, OutputIt result, @@ -374,7 +374,7 @@ OutputIt THRUST_HIP_FUNCTION exclusive_scan_n(execution_policy& policy, return __scan::exclusive_scan(policy, first, result, num_items, init, scan_op); } - __device__ static OutputIt seq(execution_policy& policy, + THRUST_DEVICE static OutputIt seq(execution_policy& policy, InputIt first, Size num_items, OutputIt result, diff --git a/thrust/system/hip/detail/scan_by_key.h b/thrust/system/hip/detail/scan_by_key.h index 365213784..dfb72e4b6 100644 --- a/thrust/system/hip/detail/scan_by_key.h +++ b/thrust/system/hip/detail/scan_by_key.h @@ -322,7 +322,7 @@ namespace __scan_by_key // Inclusive scan //--------------------------- -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static ValOutputIt par(execution_policy& policy, + THRUST_HOST static ValOutputIt par(execution_policy& policy, KeyInputIt key_first, KeyInputIt key_last, ValInputIt value_first, @@ -352,7 +352,7 @@ inclusive_scan_by_key(execution_policy& policy, policy, key_first, key_last, value_first, value_result, binary_pred, scan_op); } - __device__ static ValOutputIt seq(execution_policy& policy, + THRUST_DEVICE static ValOutputIt seq(execution_policy& policy, KeyInputIt key_first, KeyInputIt key_last, ValInputIt value_first, @@ -422,7 +422,7 @@ inclusive_scan_by_key(execution_policy& policy, // Exclusive scan //--------------------------- -__thrust_exec_check_disable__ template & policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static ValOutputIt par(execution_policy& policy, + THRUST_HOST static ValOutputIt par(execution_policy& policy, KeyInputIt key_first, KeyInputIt key_last, ValInputIt value_first, @@ -456,7 +456,7 @@ exclusive_scan_by_key(execution_policy& policy, policy, key_first, key_last, value_first, value_result, init, binary_pred, scan_op); } - __device__ static ValOutputIt seq(execution_policy& policy, + THRUST_DEVICE static ValOutputIt seq(execution_policy& policy, KeyInputIt key_first, KeyInputIt key_last, ValInputIt value_first, diff --git a/thrust/system/hip/detail/set_operations.h b/thrust/system/hip/detail/set_operations.h index 7f9ffb02c..ea3017ad8 100644 --- a/thrust/system/hip/detail/set_operations.h +++ b/thrust/system/hip/detail/set_operations.h @@ -981,7 +981,7 @@ namespace __set_operations status = __parallel_for::parallel_for( number_of_blocks + 1, - [=] __device__(Size idx) mutable { + [=] THRUST_DEVICE(Size idx) mutable { Size partition_at = min(idx * items_per_block, num_keys1 + num_keys2); partitions[idx] = balanced_path( keys1, keys2, num_keys1, num_keys2, partition_at, 4ll, compare_op); diff --git a/thrust/system/hip/detail/sort.h b/thrust/system/hip/detail/sort.h index 1637035b6..a04120359 100644 --- a/thrust/system/hip/detail/sort.h +++ b/thrust/system/hip/detail/sort.h @@ -389,7 +389,7 @@ namespace __smart_sort // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template void THRUST_HIP_FUNCTION stable_sort(execution_policy& policy, ItemsIt first, @@ -399,7 +399,7 @@ stable_sort(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static void par(execution_policy& policy, ItemsIt first, ItemsIt last, @@ -409,7 +409,7 @@ stable_sort(execution_policy& policy, __smart_sort::smart_sort( policy, first, last, (item_type*)NULL, compare_op); } - __device__ + THRUST_DEVICE static void seq(execution_policy& policy, ItemsIt first, ItemsIt last, @@ -425,7 +425,7 @@ stable_sort(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template void THRUST_HIP_FUNCTION sort(execution_policy& policy, ItemsIt first, @@ -435,7 +435,7 @@ sort(execution_policy& policy, hip_rocprim::stable_sort(policy, first, last, compare_op); } -__thrust_exec_check_disable__ template @@ -449,7 +449,7 @@ stable_sort_by_key(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static void par(execution_policy& policy, KeysIt keys_first, KeysIt keys_last, @@ -460,7 +460,7 @@ stable_sort_by_key(execution_policy& policy, policy, keys_first, keys_last, values, compare_op); } - __device__ + THRUST_DEVICE static void seq(execution_policy& policy, KeysIt keys_first, KeysIt keys_last, @@ -479,7 +479,7 @@ stable_sort_by_key(execution_policy& policy, #endif } -__thrust_exec_check_disable__ template diff --git a/thrust/system/hip/detail/tabulate.h b/thrust/system/hip/detail/tabulate.h index 463cee560..acfe0ed76 100644 --- a/thrust/system/hip/detail/tabulate.h +++ b/thrust/system/hip/detail/tabulate.h @@ -48,11 +48,11 @@ namespace __tabulate { Iterator items; TabulateOp op; - __host__ __device__ + THRUST_HOST_DEVICE functor(Iterator items_, TabulateOp op_) : items(items_), op(op_) {} - void __device__ operator()(Size idx) + void THRUST_DEVICE operator()(Size idx) { items[idx] = op(idx); } diff --git a/thrust/system/hip/detail/unique.h b/thrust/system/hip/detail/unique.h index 8f80641eb..4c86097c6 100644 --- a/thrust/system/hip/detail/unique.h +++ b/thrust/system/hip/detail/unique.h @@ -50,7 +50,7 @@ THRUST_NAMESPACE_BEGIN template -ForwardIterator __host__ __device__ +ForwardIterator THRUST_HOST_DEVICE unique(const thrust::detail::execution_policy_base& exec, ForwardIterator first, ForwardIterator last, @@ -60,7 +60,7 @@ template -__host__ __device__ OutputIterator +THRUST_HOST_DEVICE OutputIterator unique_copy(const thrust::detail::execution_policy_base& exec, InputIterator first, InputIterator last, @@ -144,7 +144,7 @@ namespace __unique // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ template @@ -158,7 +158,7 @@ unique_copy(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static OutputIt par(execution_policy& policy, + THRUST_HOST static OutputIt par(execution_policy& policy, InputIt first, InputIt last, OutputIt result, @@ -166,7 +166,7 @@ unique_copy(execution_policy& policy, { return __unique::unique(policy, first, last, result, binary_pred); } - __device__ static OutputIt seq(execution_policy& policy, + THRUST_DEVICE static OutputIt seq(execution_policy& policy, InputIt first, InputIt last, OutputIt result, @@ -191,7 +191,7 @@ unique_copy(execution_policy& policy, InputIt first, InputIt last, Outp return hip_rocprim::unique_copy(policy, first, last, result, equal_to()); } -__thrust_exec_check_disable__ template +THRUST_EXEC_CHECK_DISABLE template InputIt THRUST_HIP_FUNCTION unique(execution_policy& policy, InputIt first, @@ -201,12 +201,12 @@ unique(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static InputIt + THRUST_HOST static InputIt par(execution_policy& policy, InputIt first, InputIt last, BinaryPred binary_pred) { return hip_rocprim::unique_copy(policy, first, last, first, binary_pred); } - __device__ static InputIt + THRUST_DEVICE static InputIt seq(execution_policy& policy, InputIt first, InputIt last, BinaryPred binary_pred) { return thrust::unique(cvt_to_seq(derived_cast(policy)), first, last, binary_pred); @@ -231,14 +231,14 @@ InputIt THRUST_HIP_FUNCTION unique(execution_policy& policy, template struct zip_adj_not_predicate { template - bool __host__ __device__ operator()(TupleType&& tuple) { + bool THRUST_HOST_DEVICE operator()(TupleType&& tuple) { return !binary_pred(thrust::get<0>(tuple), thrust::get<1>(tuple)); } BinaryPred binary_pred; }; -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template diff --git a/thrust/system/hip/detail/unique_by_key.h b/thrust/system/hip/detail/unique_by_key.h index 47c5b8d7a..9cbac2910 100644 --- a/thrust/system/hip/detail/unique_by_key.h +++ b/thrust/system/hip/detail/unique_by_key.h @@ -52,7 +52,7 @@ THRUST_NAMESPACE_BEGIN template -thrust::pair __host__ __device__ +thrust::pair THRUST_HOST_DEVICE unique_by_key(const thrust::detail::execution_policy_base& exec, ForwardIterator1 keys_first, ForwardIterator1 keys_last, @@ -62,7 +62,7 @@ template - __host__ __device__ thrust::pair + THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(const thrust::detail::execution_policy_base& exec, InputIterator1 keys_first, InputIterator1 keys_last, @@ -157,7 +157,7 @@ namespace __unique_by_key // Thrust API entry points //------------------------- -__thrust_exec_check_disable__ template // struct workaround is required for HIP-clang struct workaround { - __host__ + THRUST_HOST static pair par(execution_policy& policy, KeyInputIt keys_first, KeyInputIt keys_last, @@ -192,7 +192,7 @@ pair values_result, binary_pred); } - __device__ + THRUST_DEVICE static pair seq(execution_policy& policy, KeyInputIt keys_first, KeyInputIt keys_last, @@ -253,7 +253,7 @@ unique_by_key(execution_policy& policy, // struct workaround is required for HIP-clang struct workaround { - __host__ static pair par(execution_policy& policy, + THRUST_HOST static pair par(execution_policy& policy, KeyInputIt keys_first, KeyInputIt keys_last, ValInputIt values_first, @@ -262,7 +262,7 @@ unique_by_key(execution_policy& policy, return hip_rocprim::unique_by_key_copy( policy, keys_first, keys_last, values_first, keys_first, values_first, binary_pred); } - __device__ static pair seq(execution_policy& policy, + THRUST_DEVICE static pair seq(execution_policy& policy, KeyInputIt keys_first, KeyInputIt keys_last, ValInputIt values_first, diff --git a/thrust/system/hip/detail/util.h b/thrust/system/hip/detail/util.h index 5c30b05da..7813273e5 100644 --- a/thrust/system/hip/detail/util.h +++ b/thrust/system/hip/detail/util.h @@ -54,7 +54,7 @@ THRUST_NAMESPACE_BEGIN namespace hip_rocprim { -inline __host__ __device__ hipStream_t default_stream() +inline THRUST_HOST_DEVICE hipStream_t default_stream() { #ifdef HIP_API_PER_THREAD_DEFAULT_STREAM return hipStreamPerThread; @@ -64,41 +64,41 @@ inline __host__ __device__ hipStream_t default_stream() } template -hipStream_t __host__ __device__ +hipStream_t THRUST_HOST_DEVICE get_stream(execution_policy&) { return default_stream(); } // Fallback implementation of the customization point. -template __host__ __device__ +template THRUST_HOST_DEVICE bool must_perform_optional_stream_synchronization(execution_policy &) { return true; } // Entry point/interface. -template __host__ __device__ +template THRUST_HOST_DEVICE bool must_perform_optional_synchronization(execution_policy &policy) { return must_perform_optional_stream_synchronization(derived_cast(policy)); } template -__host__ __device__ integral_constant allows_nondeterminism(execution_policy&) +THRUST_HOST_DEVICE integral_constant allows_nondeterminism(execution_policy&) { return {}; } template -__host__ __device__ auto nondeterministic(execution_policy& policy) +THRUST_HOST_DEVICE auto nondeterministic(execution_policy& policy) -> decltype(allows_nondeterminism(derived_cast(policy))) { return {}; } template -__host__ __device__ hipError_t synchronize_stream(execution_policy& policy) +THRUST_HOST_DEVICE hipError_t synchronize_stream(execution_policy& policy) { hipError_t result; // Can't use #if inside NV_IF_TARGET, use a temp macro to hoist the device @@ -123,7 +123,7 @@ __host__ __device__ hipError_t synchronize_stream(execution_policy& pol } // Fallback implementation of the customization point. -template __host__ __device__ +template THRUST_HOST_DEVICE hipError_t synchronize_stream_optional(execution_policy &policy) { hipError_t result; @@ -141,14 +141,14 @@ hipError_t synchronize_stream_optional(execution_policy &policy) } // Entry point/interface. -template __host__ __device__ +template THRUST_HOST_DEVICE hipError_t synchronize_optional(Policy &policy) { return synchronize_stream_optional(derived_cast(policy)); } -__thrust_exec_check_disable__ template -__host__ __device__ hipError_t synchronize(Policy& policy) +THRUST_EXEC_CHECK_DISABLE template +THRUST_HOST_DEVICE hipError_t synchronize(Policy& policy) { #if __THRUST_HAS_HIPRT__ return synchronize_stream(derived_cast(policy)); @@ -159,7 +159,7 @@ __host__ __device__ hipError_t synchronize(Policy& policy) } template -__host__ __device__ hipStream_t stream(execution_policy& policy) +THRUST_HOST_DEVICE hipStream_t stream(execution_policy& policy) { return get_stream(derived_cast(policy)); } @@ -209,7 +209,7 @@ trivial_copy_to_device(Type* dst, Type const* src, size_t count, hipStream_t str } template -__host__ __device__ hipError_t +THRUST_HOST_DEVICE hipError_t trivial_copy_device_to_device(Policy& policy, Type* dst, Type const* src, size_t count) { hipError_t status = hipSuccess; @@ -225,12 +225,12 @@ trivial_copy_device_to_device(Policy& policy, Type* dst, Type const* src, size_t return status; } -inline void __host__ __device__ terminate() +inline void THRUST_HOST_DEVICE terminate() { NV_IF_TARGET(NV_IS_HOST, (std::terminate();), (abort();)); } -inline void __host__ __device__ throw_on_error(hipError_t status, char const* msg) +inline void THRUST_HOST_DEVICE throw_on_error(hipError_t status, char const* msg) { // Clear the global HIP error state which may have been set by the last // call. Otherwise, errors may "leak" to unrelated kernel launches. @@ -259,7 +259,7 @@ inline void __host__ __device__ throw_on_error(hipError_t status, char const* ms } // TODO this overload should be removed and messages should be passed. -inline void __host__ __device__ throw_on_error(hipError_t status) +inline void THRUST_HOST_DEVICE throw_on_error(hipError_t status) { // Clear the global HIP error state which may have been set by the last // call. Otherwise, errors may "leak" to unrelated kernel launches. @@ -514,13 +514,13 @@ struct transform_pair_of_input_iterators_t struct identity { template - __host__ __device__ T const& operator()(T const& t) const + THRUST_HOST_DEVICE T const& operator()(T const& t) const { return t; } template - __host__ __device__ T& operator()(T& t) const + THRUST_HOST_DEVICE T& operator()(T& t) const { return t; } diff --git a/thrust/system/hip/future.h b/thrust/system/hip/future.h index 8ee9aa2f2..227df30d8 100644 --- a/thrust/system/hip/future.h +++ b/thrust/system/hip/future.h @@ -26,7 +26,7 @@ template struct unique_eager_future; template -__host__ +THRUST_HOST unique_eager_event when_all(Events&&... evs); }} // namespace system::hip @@ -49,14 +49,14 @@ using thrust::system::hip::when_all; } // namespace hip template -__host__ +THRUST_HOST thrust::hip::unique_eager_event unique_eager_event_type( thrust::hip::execution_policy const& ) noexcept; template -__host__ +THRUST_HOST thrust::hip::unique_eager_future unique_eager_future_type( thrust::hip::execution_policy const& diff --git a/thrust/system/hip/memory.h b/thrust/system/hip/memory.h index a6ad70640..3e13ba626 100644 --- a/thrust/system/hip/memory.h +++ b/thrust/system/hip/memory.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2018 NVIDIA Corporation - * Modifications Copyright© 2019-2021 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ namespace hip_rocprim * \see hip::free * \see std::malloc */ -inline __host__ __device__ pointer malloc(std::size_t n); +inline THRUST_HOST_DEVICE pointer malloc(std::size_t n); /*! Allocates a typed area of memory available to Thrust's hip system. * \param n Number of elements to allocate. @@ -55,7 +55,7 @@ inline __host__ __device__ pointer malloc(std::size_t n); * \see std::malloc */ template -inline __host__ __device__ pointer malloc(std::size_t n); +inline THRUST_HOST_DEVICE pointer malloc(std::size_t n); /*! Deallocates an area of memory previously allocated by hip::malloc. * \param ptr A hip::pointer pointing to the beginning of an area @@ -63,7 +63,7 @@ inline __host__ __device__ pointer malloc(std::size_t n); * \see hip::malloc * \see std::free */ -inline __host__ __device__ void free(pointer ptr); +inline THRUST_HOST_DEVICE void free(pointer ptr); /*! \p hip::allocator is the default allocator used by the \p hip system's * containers such as hip::vector if no user-specified allocator is diff --git a/thrust/system/omp/detail/execution_policy.h b/thrust/system/omp/detail/execution_policy.h index f9b45312b..2225053ab 100644 --- a/thrust/system/omp/detail/execution_policy.h +++ b/thrust/system/omp/detail/execution_policy.h @@ -70,7 +70,7 @@ template // arbitrarily define in the omp backend template -inline __host__ __device__ +inline THRUST_HOST_DEVICE System1 select_system(execution_policy s, thrust::system::tbb::detail::execution_policy) { return thrust::detail::derived_cast(s); @@ -78,7 +78,7 @@ inline __host__ __device__ template -inline __host__ __device__ +inline THRUST_HOST_DEVICE System2 select_system(thrust::system::tbb::detail::execution_policy, execution_policy s) { return thrust::detail::derived_cast(s); diff --git a/thrust/system/omp/detail/par.h b/thrust/system/omp/detail/par.h index 406817fff..8a092621b 100644 --- a/thrust/system/omp/detail/par.h +++ b/thrust/system/omp/detail/par.h @@ -33,7 +33,7 @@ struct par_t : thrust::system::omp::detail::execution_policy, thrust::detail::allocator_aware_execution_policy< thrust::system::omp::detail::execution_policy> { - __host__ __device__ + THRUST_HOST_DEVICE constexpr par_t() : thrust::system::omp::detail::execution_policy() {} }; diff --git a/thrust/system/omp/execution_policy.h b/thrust/system/omp/execution_policy.h index c027d6be6..aa1555223 100644 --- a/thrust/system/omp/execution_policy.h +++ b/thrust/system/omp/execution_policy.h @@ -125,7 +125,7 @@ struct tag : thrust::system::omp::execution_policy { unspecified }; * * struct printf_functor * { - * __host__ __device__ + * THRUST_HOST_DEVICE * void operator()(int x) * { * printf("%d\n", x); diff --git a/thrust/system/tbb/detail/par.h b/thrust/system/tbb/detail/par.h index 496253193..aaaa9159f 100644 --- a/thrust/system/tbb/detail/par.h +++ b/thrust/system/tbb/detail/par.h @@ -33,7 +33,7 @@ struct par_t : thrust::system::tbb::detail::execution_policy, thrust::detail::allocator_aware_execution_policy< thrust::system::tbb::detail::execution_policy> { - __host__ __device__ + THRUST_HOST_DEVICE constexpr par_t() : thrust::system::tbb::detail::execution_policy() {} }; diff --git a/thrust/system/tbb/execution_policy.h b/thrust/system/tbb/execution_policy.h index bfa6b7893..6336587b2 100644 --- a/thrust/system/tbb/execution_policy.h +++ b/thrust/system/tbb/execution_policy.h @@ -125,7 +125,7 @@ struct tag : thrust::system::tbb::execution_policy { unspecified }; * * struct printf_functor * { - * __host__ __device__ + * THRUST_HOST_DEVICE * void operator()(int x) * { * printf("%d\n", x); diff --git a/thrust/tabulate.h b/thrust/tabulate.h index 7cb794550..6b924b9f9 100644 --- a/thrust/tabulate.h +++ b/thrust/tabulate.h @@ -71,7 +71,7 @@ THRUST_NAMESPACE_BEGIN * \see thrust::sequence */ template -__host__ __device__ +THRUST_HOST_DEVICE void tabulate(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, diff --git a/thrust/transform.h b/thrust/transform.h index 2d064c13b..702259ca3 100644 --- a/thrust/transform.h +++ b/thrust/transform.h @@ -86,7 +86,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, OutputIterator result, @@ -201,7 +201,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, @@ -315,7 +315,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, ForwardIterator result, @@ -387,7 +387,7 @@ __host__ __device__ * * struct is_odd * { - * __host__ __device__ + * THRUST_HOST_DEVICE * bool operator()(int x) * { * return x % 2; @@ -482,7 +482,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first, InputIterator1 last, InputIterator2 stencil, @@ -630,7 +630,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, diff --git a/thrust/transform_reduce.h b/thrust/transform_reduce.h index 11d6b84c3..1722c183f 100644 --- a/thrust/transform_reduce.h +++ b/thrust/transform_reduce.h @@ -80,7 +80,7 @@ THRUST_NAMESPACE_BEGIN * template * struct absolute_value : public unary_function * { - * __host__ __device__ T operator()(const T &x) const + * THRUST_HOST_DEVICE T operator()(const T &x) const * { * return x < T(0) ? -x : x; * } @@ -105,7 +105,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType transform_reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -155,7 +155,7 @@ __host__ __device__ * template * struct absolute_value : public unary_function * { - * __host__ __device__ T operator()(const T &x) const + * THRUST_HOST_DEVICE T operator()(const T &x) const * { * return x < T(0) ? -x : x; * } diff --git a/thrust/transform_scan.h b/thrust/transform_scan.h index 4b86c0081..86a400422 100644 --- a/thrust/transform_scan.h +++ b/thrust/transform_scan.h @@ -103,7 +103,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -240,7 +240,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, diff --git a/thrust/tuple.h b/thrust/tuple.h index 6e9127384..6f1ca3441 100644 --- a/thrust/tuple.h +++ b/thrust/tuple.h @@ -47,22 +47,22 @@ THRUST_NAMESPACE_BEGIN // define null_type for backwards compatability struct null_type {}; -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator==(const null_type&, const null_type&) { return true; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator>=(const null_type&, const null_type&) { return true; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator<=(const null_type&, const null_type&) { return true; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator!=(const null_type&, const null_type&) { return false; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator<(const null_type&, const null_type&) { return false; } -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator>(const null_type&, const null_type&) { return false; } /*! \endcond @@ -255,7 +255,7 @@ template struct tuple_size; * \see tuple */ template -__host__ __device__ +THRUST_HOST_DEVICE inline typename access_traits< typename tuple_element >::type >::non_const_type @@ -286,7 +286,7 @@ get(detail::cons& t); * \see tuple */ template -__host__ __device__ +THRUST_HOST_DEVICE inline typename access_traits< typename tuple_element >::type >::const_type @@ -355,14 +355,14 @@ template ::parameter_type t0) : inherited(t0, static_cast(null_type()), @@ -381,7 +381,7 @@ template ::parameter_type t0, typename access_traits::parameter_type t1) : inherited(t0, t1, @@ -397,7 +397,7 @@ template ::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2) @@ -410,7 +410,7 @@ template (null_type()), static_cast(null_type())) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(typename access_traits::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2, @@ -423,7 +423,7 @@ template (null_type()), static_cast(null_type())) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(typename access_traits::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2, @@ -436,7 +436,7 @@ template (null_type()), static_cast(null_type())) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(typename access_traits::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2, @@ -449,7 +449,7 @@ template (null_type()), static_cast(null_type())) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(typename access_traits::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2, @@ -462,7 +462,7 @@ template (null_type()), static_cast(null_type())) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(typename access_traits::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2, @@ -475,7 +475,7 @@ template (null_type()), static_cast(null_type())) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(typename access_traits::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2, @@ -488,7 +488,7 @@ template (null_type())) {} - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(typename access_traits::parameter_type t0, typename access_traits::parameter_type t1, typename access_traits::parameter_type t2, @@ -503,12 +503,12 @@ template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple(const detail::cons& p) : inherited(p) {} - __thrust_exec_check_disable__ + THRUST_EXEC_CHECK_DISABLE template - inline __host__ __device__ + inline THRUST_HOST_DEVICE tuple& operator=(const detail::cons& k) { inherited::operator=(k); @@ -521,9 +521,9 @@ template - __host__ __device__ inline + THRUST_HOST_DEVICE inline tuple& operator=(const thrust::pair& k) { //BOOST_STATIC_ASSERT(length::value == 2);// check_length = 2 this->head = k.first; @@ -535,7 +535,7 @@ template tuple with which to swap. */ - inline __host__ __device__ + inline THRUST_HOST_DEVICE void swap(tuple &t) { inherited::swap(t); @@ -564,7 +564,7 @@ class tuple -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0); @@ -580,7 +580,7 @@ __host__ __device__ inline * for brevity. */ template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1); @@ -591,7 +591,7 @@ __host__ __device__ inline * \return A \p tuple object with one member which is a reference to \p t0. */ template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0& t0); /*! This version of \p tie creates a new \c tuple of references object which @@ -606,7 +606,7 @@ tuple tie(T0& t0); * brevity. */ template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0& t0, T1& t1); /*! \p swap swaps the contents of two tuples. @@ -618,7 +618,7 @@ template< typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9 > -inline __host__ __device__ +inline THRUST_HOST_DEVICE void swap(tuple &x, tuple &y); @@ -628,94 +628,94 @@ void swap(tuple &x, */ template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline typename detail::make_tuple_mapper::type make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8); template -__host__ __device__ inline +THRUST_HOST_DEVICE inline tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9); -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator==(const null_type&, const null_type&); -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator>=(const null_type&, const null_type&); -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator<=(const null_type&, const null_type&); -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator!=(const null_type&, const null_type&); -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator<(const null_type&, const null_type&); -__host__ __device__ inline +THRUST_HOST_DEVICE inline bool operator>(const null_type&, const null_type&); /*! \endcond diff --git a/thrust/type_traits/integer_sequence.h b/thrust/type_traits/integer_sequence.h index d662ed8f2..fb57f8dcb 100644 --- a/thrust/type_traits/integer_sequence.h +++ b/thrust/type_traits/integer_sequence.h @@ -67,7 +67,7 @@ struct integer_sequence using value_type = T; using size_type = std::size_t; - __host__ __device__ + THRUST_HOST_DEVICE static constexpr size_type size() noexcept { return sizeof...(Is); diff --git a/thrust/type_traits/is_contiguous_iterator.h b/thrust/type_traits/is_contiguous_iterator.h index abdd8d20f..437b9079b 100644 --- a/thrust/type_traits/is_contiguous_iterator.h +++ b/thrust/type_traits/is_contiguous_iterator.h @@ -232,7 +232,7 @@ using contiguous_iterator_raw_pointer_t = // Converts a contiguous iterator to a raw pointer: template -__host__ __device__ +THRUST_HOST_DEVICE contiguous_iterator_raw_pointer_t contiguous_iterator_raw_pointer_cast(Iterator it) { @@ -249,7 +249,7 @@ struct try_unwrap_contiguous_iterator_impl { using type = Iterator; - static __host__ __device__ type get(Iterator it) { return it; } + static THRUST_HOST_DEVICE type get(Iterator it) { return it; } }; // Implementation for contiguous iterators -- unwraps to raw pointer. @@ -258,7 +258,7 @@ struct try_unwrap_contiguous_iterator_impl { using type = contiguous_iterator_raw_pointer_t; - static __host__ __device__ type get(Iterator it) + static THRUST_HOST_DEVICE type get(Iterator it) { return contiguous_iterator_raw_pointer_cast(it); } @@ -271,7 +271,7 @@ using try_unwrap_contiguous_iterator_return_t = // Casts to a raw pointer if iterator is marked as contiguous, otherwise returns // the input iterator. template -__host__ __device__ +THRUST_HOST_DEVICE try_unwrap_contiguous_iterator_return_t try_unwrap_contiguous_iterator(Iterator it) { diff --git a/thrust/type_traits/is_trivially_relocatable.h b/thrust/type_traits/is_trivially_relocatable.h index 21d1f09d8..999f1f974 100644 --- a/thrust/type_traits/is_trivially_relocatable.h +++ b/thrust/type_traits/is_trivially_relocatable.h @@ -294,8 +294,6 @@ THRUST_NAMESPACE_END #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA -#include - THRUST_PROCLAIM_TRIVIALLY_RELOCATABLE(char1) THRUST_PROCLAIM_TRIVIALLY_RELOCATABLE(char2) THRUST_PROCLAIM_TRIVIALLY_RELOCATABLE(char3) diff --git a/thrust/uninitialized_copy.h b/thrust/uninitialized_copy.h index 94c2763e3..a136c52c2 100644 --- a/thrust/uninitialized_copy.h +++ b/thrust/uninitialized_copy.h @@ -69,7 +69,7 @@ THRUST_NAMESPACE_BEGIN * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; @@ -92,7 +92,7 @@ THRUST_NAMESPACE_BEGIN * \see \c device_malloc */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -131,7 +131,7 @@ __host__ __device__ * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; @@ -198,7 +198,7 @@ template * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; @@ -222,7 +222,7 @@ template * \see \c device_malloc */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base &exec, InputIterator first, Size n, @@ -262,7 +262,7 @@ __host__ __device__ * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; diff --git a/thrust/uninitialized_fill.h b/thrust/uninitialized_fill.h index 486a04158..3a41b33e5 100644 --- a/thrust/uninitialized_fill.h +++ b/thrust/uninitialized_fill.h @@ -64,7 +64,7 @@ THRUST_NAMESPACE_BEGIN * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; @@ -87,7 +87,7 @@ THRUST_NAMESPACE_BEGIN * \see \c device_malloc */ template -__host__ __device__ +THRUST_HOST_DEVICE void uninitialized_fill(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -120,7 +120,7 @@ __host__ __device__ * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; @@ -180,7 +180,7 @@ template * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; @@ -203,7 +203,7 @@ template * \see \c device_malloc */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base &exec, ForwardIterator first, Size n, @@ -237,7 +237,7 @@ __host__ __device__ * * struct Int * { - * __host__ __device__ + * THRUST_HOST_DEVICE * Int(int x) : val(x) {} * int val; * }; diff --git a/thrust/unique.h b/thrust/unique.h index cb1b2198a..623cddd50 100644 --- a/thrust/unique.h +++ b/thrust/unique.h @@ -76,7 +76,7 @@ THRUST_NAMESPACE_BEGIN */ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); @@ -169,7 +169,7 @@ ForwardIterator unique(ForwardIterator first, template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator unique(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, @@ -276,7 +276,7 @@ ForwardIterator unique(ForwardIterator first, template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -385,7 +385,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, @@ -498,7 +498,7 @@ OutputIterator unique_copy(InputIterator first, template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(const thrust::detail::execution_policy_base &exec, ForwardIterator1 keys_first, @@ -619,7 +619,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key(const thrust::detail::execution_policy_base &exec, ForwardIterator1 keys_first, @@ -745,7 +745,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -880,7 +880,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE thrust::pair unique_by_key_copy(const thrust::detail::execution_policy_base &exec, InputIterator1 keys_first, @@ -996,7 +996,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -1041,7 +1041,7 @@ __host__ __device__ */ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(const thrust::detail::execution_policy_base &exec, ForwardIterator first, @@ -1082,7 +1082,7 @@ __host__ __device__ */ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(ForwardIterator first, ForwardIterator last, @@ -1121,7 +1121,7 @@ __host__ __device__ * \see reduce_by_key_copy */ template -__host__ __device__ +THRUST_HOST_DEVICE typename thrust::iterator_traits::difference_type unique_count(ForwardIterator first, ForwardIterator last); diff --git a/thrust/zip_function.h b/thrust/zip_function.h index 5c6f6ac68..9f147297c 100644 --- a/thrust/zip_function.h +++ b/thrust/zip_function.h @@ -34,16 +34,16 @@ namespace zip_detail { // Add workaround for decltype(auto) on C++11-only compilers: #if THRUST_CPP_DIALECT >= 2014 -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE decltype(auto) apply_impl(Function&& func, Tuple&& args, index_sequence) { return func(thrust::get(THRUST_FWD(args))...); } template -__host__ __device__ +THRUST_HOST_DEVICE decltype(auto) apply(Function&& func, Tuple&& args) { constexpr auto tuple_size = thrust::tuple_size::type>::value; @@ -52,14 +52,14 @@ decltype(auto) apply(Function&& func, Tuple&& args) #else // THRUST_CPP_DIALECT -__thrust_exec_check_disable__ +THRUST_EXEC_CHECK_DISABLE template -__host__ __device__ +THRUST_HOST_DEVICE auto apply_impl(Function&& func, Tuple&& args, index_sequence) THRUST_DECLTYPE_RETURNS(func(thrust::get(THRUST_FWD(args))...)) template -__host__ __device__ +THRUST_HOST_DEVICE auto apply(Function&& func, Tuple&& args) THRUST_DECLTYPE_RETURNS( apply_impl( @@ -154,7 +154,7 @@ class zip_function { public: /*! Constructs a \p zip_function with the provided function object \p func. */ - __host__ __device__ + THRUST_HOST_DEVICE zip_function(Function func) : func(std::move(func)) {} /*! Applies the N-ary function object to elements of the tuple \p args. */ @@ -162,7 +162,7 @@ class zip_function #if THRUST_CPP_DIALECT >= 2014 template - __host__ __device__ + THRUST_HOST_DEVICE decltype(auto) operator()(Tuple&& args) const { return detail::zip_detail::apply(func, THRUST_FWD(args)); @@ -173,7 +173,7 @@ class zip_function // Can't just use THRUST_DECLTYPE_RETURNS here since we need to use // std::declval for the signature components: template - __host__ __device__ + THRUST_HOST_DEVICE auto operator()(Tuple&& args) const noexcept(noexcept(detail::zip_detail::apply(std::declval(), THRUST_FWD(args)))) THRUST_TRAILING_RETURN(decltype(detail::zip_detail::apply(std::declval(), THRUST_FWD(args)))) @@ -195,7 +195,7 @@ class zip_function * \see zip_function */ template -__host__ __device__ +THRUST_HOST_DEVICE zip_function::type> make_zip_function(Function&& fun) { From a7a5d20ffc367950556c5c6d23ae20ea78705f30 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 11:23:28 +0000 Subject: [PATCH 03/44] Add missing overloads for thrust::pow --- testing/complex.cu | 35 +++++++++++++++++++++++++++++++++-- thrust/complex.h | 30 ++++++++++++++++-------------- thrust/detail/complex/cpow.h | 6 ++++-- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/testing/complex.cu b/testing/complex.cu index cc6ef0e45..89fe42fdc 100644 --- a/testing/complex.cu +++ b/testing/complex.cu @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -453,17 +454,18 @@ struct TestComplexBasicArithmetic // Test the basic arithmetic functions against std ASSERT_ALMOST_EQUAL(thrust::abs(a), std::abs(b)); - ASSERT_ALMOST_EQUAL(thrust::arg(a), std::arg(b)); - ASSERT_ALMOST_EQUAL(thrust::norm(a), std::norm(b)); ASSERT_EQUAL(thrust::conj(a), std::conj(b)); + static_assert(std::is_same, decltype(thrust::conj(a))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::polar(data[0], data[1]), std::polar(data[0], data[1])); + static_assert(std::is_same, decltype(thrust::polar(data[0], data[1]))>::value, ""); // random_samples does not seem to produce infinities so proj(z) == z ASSERT_EQUAL(thrust::proj(a), a); + static_assert(std::is_same, decltype(thrust::proj(a))>::value, ""); } }; SimpleUnitTest TestComplexBasicArithmeticInstance; @@ -560,6 +562,9 @@ struct TestComplexExponentialFunctions ASSERT_ALMOST_EQUAL(thrust::exp(a), std::exp(b)); ASSERT_ALMOST_EQUAL(thrust::log(a), std::log(b)); ASSERT_ALMOST_EQUAL(thrust::log10(a), std::log10(b)); + static_assert(std::is_same, decltype(thrust::exp(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::log(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::log10(a))>::value, ""); } }; SimpleUnitTest @@ -579,16 +584,24 @@ struct TestComplexPowerFunctions const std::complex b_std(b_thrust); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust), std::pow(a_std, b_std)); + static_assert(std::is_same, decltype(thrust::pow(a_thrust, b_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust.real()), std::pow(a_std, b_std.real())); + static_assert(std::is_same, decltype(thrust::pow(a_thrust, b_thrust.real()))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust.real(), b_thrust), std::pow(a_std.real(), b_std)); + static_assert(std::is_same, decltype(thrust::pow(a_thrust.real(), b_thrust))>::value, ""); + + ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, 4), std::pow(a_std, 4)); + static_assert(std::is_same, decltype(thrust::pow(a_thrust, 4))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::sqrt(a_thrust), std::sqrt(a_std)); + static_assert(std::is_same, decltype(thrust::sqrt(a_thrust))>::value, ""); } // Test power functions with promoted types. { using T0 = T; using T1 = other_floating_point_type_t; + using promoted = typename thrust::detail::promoted_numerical_type::type; thrust::host_vector data = unittest::random_samples(4); @@ -598,11 +611,17 @@ struct TestComplexPowerFunctions const std::complex b_std(data[2], data[3]); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust), std::pow(a_std, b_std)); + static_assert(std::is_same, decltype(thrust::pow(a_thrust, b_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(b_thrust, a_thrust), std::pow(b_std, a_std)); + static_assert(std::is_same, decltype(thrust::pow(b_thrust, a_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust, b_thrust.real()), std::pow(a_std, b_std.real())); + static_assert(std::is_same, decltype(thrust::pow(a_thrust, b_thrust.real()))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(b_thrust, a_thrust.real()), std::pow(b_std, a_std.real())); + static_assert(std::is_same, decltype(thrust::pow(b_thrust, a_thrust.real()))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(a_thrust.real(), b_thrust), std::pow(a_std.real(), b_std)); + static_assert(std::is_same, decltype(thrust::pow(a_thrust.real(), b_thrust))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::pow(b_thrust.real(), a_thrust), std::pow(b_std.real(), a_std)); + static_assert(std::is_same, decltype(thrust::pow(b_thrust.real(), a_thrust))>::value, ""); } } }; @@ -621,20 +640,32 @@ struct TestComplexTrigonometricFunctions ASSERT_ALMOST_EQUAL(thrust::cos(a), std::cos(c)); ASSERT_ALMOST_EQUAL(thrust::sin(a), std::sin(c)); ASSERT_ALMOST_EQUAL(thrust::tan(a), std::tan(c)); + static_assert(std::is_same, decltype(thrust::cos(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::sin(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::tan(a))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::cosh(a), std::cosh(c)); ASSERT_ALMOST_EQUAL(thrust::sinh(a), std::sinh(c)); ASSERT_ALMOST_EQUAL(thrust::tanh(a), std::tanh(c)); + static_assert(std::is_same, decltype(thrust::cosh(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::sinh(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::tanh(a))>::value, ""); #if THRUST_CPP_DIALECT >= 2011 ASSERT_ALMOST_EQUAL(thrust::acos(a), std::acos(c)); ASSERT_ALMOST_EQUAL(thrust::asin(a), std::asin(c)); ASSERT_ALMOST_EQUAL(thrust::atan(a), std::atan(c)); + static_assert(std::is_same, decltype(thrust::acos(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::asin(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::atan(a))>::value, ""); ASSERT_ALMOST_EQUAL(thrust::acosh(a), std::acosh(c)); ASSERT_ALMOST_EQUAL(thrust::asinh(a), std::asinh(c)); ASSERT_ALMOST_EQUAL(thrust::atanh(a), std::atanh(c)); + static_assert(std::is_same, decltype(thrust::acosh(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::asinh(a))>::value, ""); + static_assert(std::is_same, decltype(thrust::atanh(a))>::value, ""); #endif } diff --git a/thrust/complex.h b/thrust/complex.h index 6d8425fa3..f95cace86 100644 --- a/thrust/complex.h +++ b/thrust/complex.h @@ -23,11 +23,13 @@ #pragma once #include +#include #include #include #include -#include +#include + #if THRUST_CPP_DIALECT >= 2011 # define THRUST_STD_COMPLEX_REAL(z) \ @@ -38,7 +40,7 @@ reinterpret_cast< \ const typename thrust::detail::remove_reference::type::value_type (&)[2] \ >(z)[1] -# define THRUST_STD_COMPLEX_DEVICE THRUST_DEVICE +# define THRUST_STD_COMPLEX_DEVICE __device__ #else # define THRUST_STD_COMPLEX_REAL(z) (z).real() # define THRUST_STD_COMPLEX_IMAG(z) (z).imag() @@ -221,7 +223,7 @@ struct complex * * \param z The \p complex to copy from. */ - THRUST_HOST THRUST_STD_COMPLEX_DEVICE + __host__ THRUST_STD_COMPLEX_DEVICE complex(const std::complex& z); /*! This converting copy constructor copies from a std::complex with @@ -232,7 +234,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - THRUST_HOST THRUST_STD_COMPLEX_DEVICE + __host__ THRUST_STD_COMPLEX_DEVICE complex(const std::complex& z); @@ -280,7 +282,7 @@ struct complex * * \param z The \p complex to copy from. */ - THRUST_HOST THRUST_STD_COMPLEX_DEVICE + __host__ THRUST_STD_COMPLEX_DEVICE complex& operator=(const std::complex& z); /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this @@ -291,7 +293,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - THRUST_HOST THRUST_STD_COMPLEX_DEVICE + __host__ THRUST_STD_COMPLEX_DEVICE complex& operator=(const std::complex& z); @@ -453,7 +455,7 @@ struct complex /*! Casts this \p complex to a std::complex of the same type. */ - THRUST_HOST + __host__ operator std::complex() const { return std::complex(real(), imag()); } private: @@ -749,7 +751,7 @@ pow(const complex& x, const complex& y); * \param x The base. * \param y The exponent. */ -template +template ::value, int> = 0> THRUST_HOST_DEVICE complex::type> pow(const complex& x, const T1& y); @@ -762,7 +764,7 @@ pow(const complex& x, const T1& y); * \param x The base. * \param y The exponent. */ -template +template ::value, int> = 0> THRUST_HOST_DEVICE complex::type> pow(const T0& x, const complex& y); @@ -930,7 +932,7 @@ operator<<(std::basic_ostream& os, const complex& z); * \param z The \p complex number to set. */ template -THRUST_HOST +__host__ std::basic_istream& operator>>(std::basic_istream& is, complex& z); @@ -953,7 +955,7 @@ bool operator==(const complex& x, const complex& y); * \param y The second \p complex. */ template -THRUST_HOST THRUST_STD_COMPLEX_DEVICE +__host__ THRUST_STD_COMPLEX_DEVICE bool operator==(const complex& x, const std::complex& y); /*! Returns true if two \p complex numbers are equal and false otherwise. @@ -962,7 +964,7 @@ bool operator==(const complex& x, const std::complex& y); * \param y The second \p complex. */ template -THRUST_HOST THRUST_STD_COMPLEX_DEVICE +__host__ THRUST_STD_COMPLEX_DEVICE bool operator==(const std::complex& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is zero and @@ -1000,7 +1002,7 @@ bool operator!=(const complex& x, const complex& y); * \param y The second \p complex. */ template -THRUST_HOST THRUST_STD_COMPLEX_DEVICE +__host__ THRUST_STD_COMPLEX_DEVICE bool operator!=(const complex& x, const std::complex& y); /*! Returns true if two \p complex numbers are different and false otherwise. @@ -1009,7 +1011,7 @@ bool operator!=(const complex& x, const std::complex& y); * \param y The second \p complex. */ template -THRUST_HOST THRUST_STD_COMPLEX_DEVICE +__host__ THRUST_STD_COMPLEX_DEVICE bool operator!=(const std::complex& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is not zero or diff --git a/thrust/detail/complex/cpow.h b/thrust/detail/complex/cpow.h index f7f5096ed..63cc20b3f 100644 --- a/thrust/detail/complex/cpow.h +++ b/thrust/detail/complex/cpow.h @@ -22,7 +22,9 @@ #include #include + #include +#include THRUST_NAMESPACE_BEGIN @@ -35,7 +37,7 @@ pow(const complex& x, const complex& y) return exp(log(complex(x)) * complex(y)); } -template +template ::value, int>> THRUST_HOST_DEVICE complex::type> pow(const complex& x, const T1& y) @@ -44,7 +46,7 @@ pow(const complex& x, const T1& y) return exp(log(complex(x)) * T(y)); } -template +template ::value, int>> THRUST_HOST_DEVICE complex::type> pow(const T0& x, const complex& y) From 7006599832b22c4ea57a141d690140a08ec9bb58 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 11:37:55 +0000 Subject: [PATCH 04/44] Refactors thrust::unique_by_key to use cub::DeviceSelect::UniqueByKey --- testing/unique_by_key.cu | 74 ++ thrust/system/cuda/detail/unique_by_key.h | 1044 +++++---------------- 2 files changed, 287 insertions(+), 831 deletions(-) diff --git a/testing/unique_by_key.cu b/testing/unique_by_key.cu index 76073e0ca..ec17ef63f 100644 --- a/testing/unique_by_key.cu +++ b/testing/unique_by_key.cu @@ -4,6 +4,22 @@ #include #include +template +struct index_to_value_t +{ + template + __host__ __device__ __forceinline__ ValueT operator()(IndexT index) + { + if (static_cast(index) == 4300000000ULL) + { + return static_cast(1); + } + else + { + return static_cast(0); + } + } +}; template @@ -337,6 +353,8 @@ struct TestUniqueCopyByKey }; VariableUnitTest TestUniqueCopyByKeyInstance; + + template struct TestUniqueCopyByKeyToDiscardIterator { @@ -436,3 +454,59 @@ struct TestUniqueCopyByKeyToDiscardIterator }; VariableUnitTest TestUniqueCopyByKeyToDiscardIteratorInstance; +template +struct TestUniqueCopyByKeyLargeInput +{ + void operator()() + { + using type = K; + using index_type = std::int64_t; + + const std::size_t num_items = 4400000000ULL; + thrust::host_vector reference_keys{static_cast(0), static_cast(1), static_cast(0)}; + thrust::host_vector reference_values{0, 4300000000ULL, 4300000001ULL}; + + auto keys_in = thrust::make_transform_iterator(thrust::make_counting_iterator(0ULL), index_to_value_t{}); + auto values_in = thrust::make_counting_iterator(0ULL); + thrust::device_vector keys_out(reference_keys.size()); + thrust::device_vector values_out(reference_values.size()); + + // Run test + const auto selected_aut_end = thrust::unique_by_key_copy( + keys_in, keys_in + num_items, values_in, keys_out.begin(), values_out.begin()); + + // Ensure that we created the correct output + auto const num_selected_out = thrust::distance(keys_out.begin(), selected_aut_end.first); + ASSERT_EQUAL(reference_keys.size(), static_cast(num_selected_out)); + ASSERT_EQUAL(num_selected_out, thrust::distance(values_out.begin(), selected_aut_end.second)); + keys_out.resize(num_selected_out); + values_out.resize(num_selected_out); + ASSERT_EQUAL(reference_keys, keys_out); + ASSERT_EQUAL(reference_values, values_out); + } +}; +SimpleUnitTest TestUniqueCopyByKeyLargeInputInstance; + +template +struct TestUniqueCopyByKeyLargeOutCount +{ + void operator()() + { + constexpr std::size_t num_items = 4400000000ULL; + + auto keys_in = thrust::make_counting_iterator(0ULL); + auto values_in = thrust::make_counting_iterator(0ULL); + + // Run test + auto keys_out = thrust::make_discard_iterator(); + auto values_out = thrust::make_discard_iterator(); + const auto selected_aut_end = thrust::unique_by_key_copy(thrust::device, + keys_in, keys_in + num_items, values_in, keys_out, values_out); + + // Ensure that we created the correct output + auto const num_selected_out = thrust::distance(keys_out, selected_aut_end.first); + ASSERT_EQUAL(num_items, static_cast(num_selected_out)); + ASSERT_EQUAL(num_selected_out, thrust::distance(values_out, selected_aut_end.second)); + } +}; +SimpleUnitTest TestUniqueCopyByKeyLargeOutCountInstance; diff --git a/thrust/system/cuda/detail/unique_by_key.h b/thrust/system/cuda/detail/unique_by_key.h index be8ebc865..b2f2bfbe3 100644 --- a/thrust/system/cuda/detail/unique_by_key.h +++ b/thrust/system/cuda/detail/unique_by_key.h @@ -30,890 +30,272 @@ #if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include THRUST_NAMESPACE_BEGIN -template -_CCCL_HOST_DEVICE thrust::pair -unique_by_key( - const thrust::detail::execution_policy_base &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first); +template +_CCCL_HOST_DEVICE thrust::pair unique_by_key( + const thrust::detail::execution_policy_base& exec, + ForwardIterator1 keys_first, + ForwardIterator1 keys_last, + ForwardIterator2 values_first); template -_CCCL_HOST_DEVICE thrust::pair -unique_by_key_copy( - const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -namespace cuda_cub { - -// XXX it should be possible to unify unique & unique_by_key into a single -// agent with various specializations, similar to what is done -// with partition -namespace __unique_by_key { - - template - struct PtxPolicy - { - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD, - }; - static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; - static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - static const cub::BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; - }; // struct PtxPolicy - - template - struct Tuning; - - namespace mpl = thrust::detail::mpl::math; - - template - struct items_per_thread - { - enum - { - value = mpl::min< - int, - static_cast(NOMINAL_4B_ITEMS_PER_THREAD), - mpl::max(NOMINAL_4B_ITEMS_PER_THREAD * 4 / - sizeof(T))>::value>::value - }; - }; - - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 11, - // - ITEMS_PER_THREAD = items_per_thread::value - }; - - typedef PtxPolicy<64, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_LDG, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning for sm52 - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - // - ITEMS_PER_THREAD = items_per_thread::value - }; - - typedef PtxPolicy<128, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_LDG, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning for sm35 - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 7, - // - ITEMS_PER_THREAD = items_per_thread::value - }; - - typedef PtxPolicy<128, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_DEFAULT, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning for sm30 - - template - struct UniqueByKeyAgent - { - typedef typename iterator_traits::value_type key_type; - typedef typename iterator_traits::value_type value_type; - - typedef cub::ScanTileState ScanTileState; - - template - struct PtxPlan : Tuning::type - { - typedef Tuning tuning; - - typedef typename core::LoadIterator::type KeyLoadIt; - typedef typename core::LoadIterator::type ValLoadIt; - - typedef typename core::BlockLoad::type BlockLoadKeys; - typedef typename core::BlockLoad::type BlockLoadValues; - - typedef cub::BlockDiscontinuity - BlockDiscontinuityKeys; - - typedef cub::TilePrefixCallbackOp - TilePrefixCallback; - typedef cub::BlockScan - BlockScan; - - typedef core::uninitialized_array - shared_keys_t; - typedef core::uninitialized_array - shared_values_t; - - union TempStorage - { - struct ScanStorage - { - typename BlockScan::TempStorage scan; - typename TilePrefixCallback::TempStorage prefix; - typename BlockDiscontinuityKeys::TempStorage discontinuity; - } scan_storage; - - typename BlockLoadKeys::TempStorage load_keys; - typename BlockLoadValues::TempStorage load_values; - - shared_keys_t shared_keys; - shared_values_t shared_values; - }; // union TempStorage - }; // struct PtxPlan - - typedef typename core::specialize_plan_msvc10_war::type::type ptx_plan; - - typedef typename ptx_plan::KeyLoadIt KeyLoadIt; - typedef typename ptx_plan::ValLoadIt ValLoadIt; - typedef typename ptx_plan::BlockLoadKeys BlockLoadKeys; - typedef typename ptx_plan::BlockLoadValues BlockLoadValues; - typedef typename ptx_plan::BlockDiscontinuityKeys BlockDiscontinuityKeys; - typedef typename ptx_plan::TilePrefixCallback TilePrefixCallback; - typedef typename ptx_plan::BlockScan BlockScan; - typedef typename ptx_plan::TempStorage TempStorage; - typedef typename ptx_plan::shared_keys_t shared_keys_t; - typedef typename ptx_plan::shared_values_t shared_values_t; - - enum - { - BLOCK_THREADS = ptx_plan::BLOCK_THREADS, - ITEMS_PER_THREAD = ptx_plan::ITEMS_PER_THREAD, - ITEMS_PER_TILE = ptx_plan::ITEMS_PER_TILE - }; - - struct impl - { - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - TempStorage & temp_storage; - ScanTileState & tile_state; - KeyLoadIt keys_in; - ValLoadIt values_in; - KeyOutputIt keys_out; - ValOutputIt values_out; - cub::InequalityWrapper predicate; - Size num_items; - - //--------------------------------------------------------------------- - // Utility functions - //--------------------------------------------------------------------- - - struct key_tag {}; - struct value_tag {}; - - THRUST_DEVICE_FUNCTION - shared_keys_t &get_shared(key_tag) - { - return temp_storage.shared_keys; - } - THRUST_DEVICE_FUNCTION - shared_values_t &get_shared(value_tag) - { - return temp_storage.shared_values; - } - - - template - void THRUST_DEVICE_FUNCTION - scatter(Tag tag, - OutputIt items_out, - T (&items)[ITEMS_PER_THREAD], - Size (&selection_flags)[ITEMS_PER_THREAD], - Size (&selection_indices)[ITEMS_PER_THREAD], - int /*num_tile_items*/, - int num_tile_selections, - Size num_selections_prefix, - Size /*num_selections*/) - { - using core::sync_threadblock; - -#pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - int local_scatter_offset = selection_indices[ITEM] - - num_selections_prefix; - if (selection_flags[ITEM]) - { - get_shared(tag)[local_scatter_offset] = items[ITEM]; - } - } - - sync_threadblock(); - - for (int item = threadIdx.x; - item < num_tile_selections; - item += BLOCK_THREADS) - { - items_out[num_selections_prefix + item] = get_shared(tag)[item]; - } - - sync_threadblock(); - } - - //--------------------------------------------------------------------- - // Tile processing - //--------------------------------------------------------------------- - - template - Size THRUST_DEVICE_FUNCTION - consume_tile_impl(int num_tile_items, - int tile_idx, - Size tile_base) - { - using core::sync_threadblock; - - key_type keys[ITEMS_PER_THREAD]; - Size selection_flags[ITEMS_PER_THREAD]; - Size selection_idx[ITEMS_PER_THREAD]; - - if (IS_LAST_TILE) - { - // Fill last elements with the first element - // because collectives are not suffix guarded - BlockLoadKeys(temp_storage.load_keys) - .Load(keys_in + tile_base, - keys, - num_tile_items, - *(keys_in + tile_base)); - } - else - { - BlockLoadKeys(temp_storage.load_keys).Load(keys_in + tile_base, keys); - } - - - sync_threadblock(); - - value_type values[ITEMS_PER_THREAD]; - if (IS_LAST_TILE) - { - // Fill last elements with the first element - // because collectives are not suffix guarded - BlockLoadValues(temp_storage.load_values) - .Load(values_in + tile_base, - values, - num_tile_items, - *(values_in + tile_base)); - } - else - { - BlockLoadValues(temp_storage.load_values) - .Load(values_in + tile_base, values); - } - - sync_threadblock(); - - if (IS_FIRST_TILE) - { - BlockDiscontinuityKeys(temp_storage.scan_storage.discontinuity) - .FlagHeads(selection_flags, keys, predicate); - } - else - { - key_type tile_predecessor = keys_in[tile_base - 1]; - BlockDiscontinuityKeys(temp_storage.scan_storage.discontinuity) - .FlagHeads(selection_flags, keys, predicate, tile_predecessor); - } -#pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - // Set selection_flags for out-of-bounds items - if ((IS_LAST_TILE) && (Size(threadIdx.x * ITEMS_PER_THREAD) + ITEM >= num_tile_items)) - selection_flags[ITEM] = 1; - } - - sync_threadblock(); - - - Size num_tile_selections = 0; - Size num_selections = 0; - Size num_selections_prefix = 0; - if (IS_FIRST_TILE) - { - BlockScan(temp_storage.scan_storage.scan) - .ExclusiveSum(selection_flags, - selection_idx, - num_tile_selections); - - if (threadIdx.x == 0) - { - // Update tile status if this is not the last tile - if (!IS_LAST_TILE) - tile_state.SetInclusive(0, num_tile_selections); - } - - // Do not count any out-of-bounds selections - if (IS_LAST_TILE) - { - int num_discount = ITEMS_PER_TILE - num_tile_items; - num_tile_selections -= num_discount; - } - num_selections = num_tile_selections; - } - else - { - TilePrefixCallback prefix_cb(tile_state, - temp_storage.scan_storage.prefix, - cub::Sum(), - tile_idx); - BlockScan(temp_storage.scan_storage.scan) - .ExclusiveSum(selection_flags, - selection_idx, - prefix_cb); - - num_selections = prefix_cb.GetInclusivePrefix(); - num_tile_selections = prefix_cb.GetBlockAggregate(); - num_selections_prefix = prefix_cb.GetExclusivePrefix(); - - if (IS_LAST_TILE) - { - int num_discount = ITEMS_PER_TILE - num_tile_items; - num_tile_selections -= num_discount; - num_selections -= num_discount; - } - } - - sync_threadblock(); - - scatter(key_tag(), - keys_out, - keys, - selection_flags, - selection_idx, - num_tile_items, - num_tile_selections, - num_selections_prefix, - num_selections); - - sync_threadblock(); - - scatter(value_tag(), - values_out, - values, - selection_flags, - selection_idx, - num_tile_items, - num_tile_selections, - num_selections_prefix, - num_selections); - - return num_selections; - } - - - template - Size THRUST_DEVICE_FUNCTION - consume_tile(int num_tile_items, - int tile_idx, - Size tile_base) - { - if (tile_idx == 0) - { - return consume_tile_impl(num_tile_items, - tile_idx, - tile_base); - } - else - { - return consume_tile_impl(num_tile_items, - tile_idx, - tile_base); - } - } - - //--------------------------------------------------------------------- - // Constructor - //--------------------------------------------------------------------- - - THRUST_DEVICE_FUNCTION - impl(TempStorage & temp_storage_, - ScanTileState & tile_state_, - KeyLoadIt keys_in_, - ValLoadIt values_in_, - KeyOutputIt keys_out_, - ValOutputIt values_out_, - BinaryPred binary_pred_, - Size num_items_, - int num_tiles, - NumSelectedOutIt num_selected_out) - // filed ctors - : temp_storage(temp_storage_), - tile_state(tile_state_), - keys_in(keys_in_), - values_in(values_in_), - keys_out(keys_out_), - values_out(values_out_), - predicate(binary_pred_), - num_items(num_items_) - { - int tile_idx = blockIdx.x; - Size tile_base = tile_idx * ITEMS_PER_TILE; - - if (tile_idx < num_tiles - 1) - { - consume_tile(ITEMS_PER_TILE, - tile_idx, - tile_base); - } - else - { - int num_remaining = static_cast(num_items - tile_base); - Size num_selections = consume_tile(num_remaining, - tile_idx, - tile_base); - if (threadIdx.x == 0) - { - *num_selected_out = num_selections; - } - } - } - }; // struct impl - - //--------------------------------------------------------------------- - // Agent entry point - //--------------------------------------------------------------------- - - THRUST_AGENT_ENTRY(KeyInputIt keys_in, - ValInputIt values_in, - KeyOutputIt keys_out, - ValOutputIt values_out, - BinaryPred binary_pred, - NumSelectedOutIt num_selected_out, - Size num_items, - ScanTileState tile_state, - int num_tiles, - char * shmem) - { - TempStorage &storage = *reinterpret_cast(shmem); - - impl(storage, - tile_state, - core::make_load_iterator(ptx_plan(), keys_in), - core::make_load_iterator(ptx_plan(), values_in), - keys_out, - values_out, - binary_pred, - num_items, - num_tiles, - num_selected_out); - } - }; // struct UniqueByKeyAgent +_CCCL_HOST_DEVICE thrust::pair unique_by_key_copy( + const thrust::detail::execution_policy_base& exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_result, + OutputIterator2 values_result); + +namespace cuda_cub +{ +namespace detail +{ - template - struct InitAgent +template +struct DispatchUniqueByKey +{ + static cudaError_t THRUST_RUNTIME_FUNCTION dispatch( + execution_policy& policy, + void* d_temp_storage, + size_t& temp_storage_bytes, + KeyInputIt keys_in, + ValInputIt values_in, + KeyOutputIt keys_out, + ValOutputIt values_out, + OffsetT num_items, + BinaryPred binary_pred, + pair& result_end) { - template - struct PtxPlan : PtxPolicy<128> {}; - - typedef core::specialize_plan ptx_plan; + cudaError_t status = cudaSuccess; + cudaStream_t stream = cuda_cub::stream(policy); + size_t allocation_sizes[2] = {0, sizeof(OffsetT)}; + void* allocations[2] = {nullptr, nullptr}; + + // Query algorithm memory requirements + status = cub::DeviceSelect::UniqueByKey( + nullptr, + allocation_sizes[0], + keys_in, + values_in, + keys_out, + values_out, + static_cast(nullptr), + num_items, + stream); + CUDA_CUB_RET_IF_FAIL(status); - //--------------------------------------------------------------------- - // Agent entry point - //--------------------------------------------------------------------- + status = cub::AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes); + CUDA_CUB_RET_IF_FAIL(status); - THRUST_AGENT_ENTRY(ScanTileState tile_state, - Size num_tiles, - NumSelectedIt num_selected_out, - char * /*shmem*/) + // Return if we're only querying temporary storage requirements + if (d_temp_storage == nullptr) { - tile_state.InitializeStatus(num_tiles); - if (blockIdx.x == 0 && threadIdx.x == 0) - *num_selected_out = 0; + return status; } - }; // struct InitAgent - - - template - static cudaError_t THRUST_RUNTIME_FUNCTION - doit_step(void * d_temp_storage, - size_t & temp_storage_bytes, - KeyInputIt keys_in, - ValInputIt values_in, - KeyOutputIt keys_out, - ValOutputIt values_out, - BinaryPred binary_pred, - NumSelectedOutIt num_selected_out, - Size num_items, - cudaStream_t stream) - { - using core::AgentLauncher; - using core::AgentPlan; - using core::get_agent_plan; - - typedef AgentLauncher< - UniqueByKeyAgent > - unique_agent; - - typedef typename unique_agent::ScanTileState ScanTileState; - - typedef AgentLauncher< - InitAgent > - init_agent; - - using core::get_plan; - typename get_plan::type init_plan = init_agent::get_plan(); - typename get_plan::type unique_plan = unique_agent::get_plan(stream); - - - int tile_size = unique_plan.items_per_tile; - size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size); - - size_t vshmem_size = core::vshmem_size(unique_plan.shared_memory_size, - num_tiles); - - cudaError_t status = cudaSuccess; - size_t allocation_sizes[2] = {0, vshmem_size}; - status = ScanTileState::AllocationSize(static_cast(num_tiles), allocation_sizes[0]); - CUDA_CUB_RET_IF_FAIL(status); - - void *allocations[2] = {NULL, NULL}; - // - status = cub::AliasTemporaries(d_temp_storage, - temp_storage_bytes, - allocations, - allocation_sizes); - CUDA_CUB_RET_IF_FAIL(status); - - if (d_temp_storage == NULL) + // Return for empty problems + if (num_items == 0) { + result_end = thrust::make_pair(keys_out, values_out); return status; } - ScanTileState tile_status; - status = tile_status.Init(static_cast(num_tiles), allocations[0], allocation_sizes[0]); + // Memory allocation for the number of selected output items + OffsetT* d_num_selected_out = thrust::detail::aligned_reinterpret_cast(allocations[1]); + + // Run algorithm + status = cub::DeviceSelect::UniqueByKey( + allocations[0], + allocation_sizes[0], + keys_in, + values_in, + keys_out, + values_out, + d_num_selected_out, + num_items, + binary_pred, + stream); CUDA_CUB_RET_IF_FAIL(status); - num_tiles = max(1,num_tiles); - init_agent ia(init_plan, num_tiles, stream, "unique_by_key::init_agent"); - ia.launch(tile_status, num_tiles, num_selected_out); - CUDA_CUB_RET_IF_FAIL(cudaPeekAtLastError()); - - if (num_items == 0) { return status; } - - char *vshmem_ptr = vshmem_size > 0 ? (char *)allocations[1] : NULL; - - unique_agent ua(unique_plan, num_items, stream, vshmem_ptr, "unique_by_key::unique_agent"); - ua.launch(keys_in, - values_in, - keys_out, - values_out, - binary_pred, - num_selected_out, - num_items, - tile_status, - num_tiles); - CUDA_CUB_RET_IF_FAIL(cudaPeekAtLastError()); - return status; - } - - template - THRUST_RUNTIME_FUNCTION - pair - unique_by_key(execution_policy& policy, - KeyInputIt keys_first, - KeyInputIt keys_last, - ValInputIt values_first, - KeyOutputIt keys_result, - ValOutputIt values_result, - BinaryPred binary_pred) - { - - typedef int size_type; - - size_type num_items - = static_cast(thrust::distance(keys_first, keys_last)); - - size_t temp_storage_bytes = 0; - cudaStream_t stream = cuda_cub::stream(policy); - - cudaError_t status; - status = __unique_by_key::doit_step(NULL, - temp_storage_bytes, - keys_first, - values_first, - keys_result, - values_result, - binary_pred, - reinterpret_cast(NULL), - num_items, - stream); - cuda_cub::throw_on_error(status, "unique_by_key: failed on 1st step"); - - size_t allocation_sizes[2] = {sizeof(size_type), temp_storage_bytes}; - void * allocations[2] = {NULL, NULL}; - - size_t storage_size = 0; - status = core::alias_storage(NULL, - storage_size, - allocations, - allocation_sizes); - cuda_cub::throw_on_error(status, "unique_by_key failed on 1st alias_storage"); - - // Allocate temporary storage. - thrust::detail::temporary_array - tmp(policy, storage_size); - void *ptr = static_cast(tmp.data().get()); - - status = core::alias_storage(ptr, - storage_size, - allocations, - allocation_sizes); - cuda_cub::throw_on_error(status, "unique_by_key failed on 2nd alias_storage"); - - size_type* d_num_selected_out - = thrust::detail::aligned_reinterpret_cast(allocations[0]); - - status = __unique_by_key::doit_step(allocations[1], - temp_storage_bytes, - keys_first, - values_first, - keys_result, - values_result, - binary_pred, - d_num_selected_out, - num_items, - stream); - cuda_cub::throw_on_error(status, "unique_by_key: failed on 2nd step"); - + // Get number of selected items status = cuda_cub::synchronize(policy); - cuda_cub::throw_on_error(status, "unique_by_key: failed to synchronize"); - - size_type num_selected = get_value(policy, d_num_selected_out); + CUDA_CUB_RET_IF_FAIL(status); + OffsetT num_selected = get_value(policy, d_num_selected_out); - return thrust::make_pair( - keys_result + num_selected, - values_result + num_selected - ); + result_end = thrust::make_pair(keys_out + num_selected, values_out + num_selected); + return status; } +}; + +template +THRUST_RUNTIME_FUNCTION pair unique_by_key( + execution_policy& policy, + KeyInputIt keys_first, + KeyInputIt keys_last, + ValInputIt values_first, + KeyOutputIt keys_result, + ValOutputIt values_result, + BinaryPred binary_pred) +{ + using size_type = typename iterator_traits::difference_type; + + size_type num_items = static_cast(thrust::distance(keys_first, keys_last)); + pair result_end{}; + cudaError_t status = cudaSuccess; + size_t temp_storage_bytes = 0; + + // 32-bit offset-type dispatch + using dispatch32_t = + DispatchUniqueByKey; + + // 64-bit offset-type dispatch + using dispatch64_t = + DispatchUniqueByKey; + + // Query temporary storage requirements + THRUST_INDEX_TYPE_DISPATCH2( + status, + dispatch32_t::dispatch, + dispatch64_t::dispatch, + num_items, + (policy, + nullptr, + temp_storage_bytes, + keys_first, + values_first, + keys_result, + values_result, + num_items_fixed, + binary_pred, + result_end)); + cuda_cub::throw_on_error(status, "unique_by_key: failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp(policy, temp_storage_bytes); + void* temp_storage = static_cast(tmp.data().get()); + + // Run algorithm + THRUST_INDEX_TYPE_DISPATCH2( + status, + dispatch32_t::dispatch, + dispatch64_t::dispatch, + num_items, + (policy, + temp_storage, + temp_storage_bytes, + keys_first, + values_first, + keys_result, + values_result, + num_items_fixed, + binary_pred, + result_end)); + cuda_cub::throw_on_error(status, "unique_by_key: failed on 2nd step"); + + return result_end; +} -} // namespace __unique_by_key - +} // namespace detail //------------------------- // Thrust API entry points //------------------------- - - _CCCL_EXEC_CHECK_DISABLE -template -pair _CCCL_HOST_DEVICE -unique_by_key_copy(execution_policy &policy, - KeyInputIt keys_first, - KeyInputIt keys_last, - ValInputIt values_first, - KeyOutputIt keys_result, - ValOutputIt values_result, - BinaryPred binary_pred) +template +pair _CCCL_HOST_DEVICE unique_by_key_copy( + execution_policy& policy, + KeyInputIt keys_first, + KeyInputIt keys_last, + ValInputIt values_first, + KeyOutputIt keys_result, + ValOutputIt values_result, + BinaryPred binary_pred) { auto ret = thrust::make_pair(keys_result, values_result); THRUST_CDP_DISPATCH( - (ret = __unique_by_key::unique_by_key(policy, - keys_first, - keys_last, - values_first, - keys_result, - values_result, - binary_pred);), - (ret = thrust::unique_by_key_copy(cvt_to_seq(derived_cast(policy)), - keys_first, - keys_last, - values_first, - keys_result, - values_result, - binary_pred);)); + (ret = detail::unique_by_key(policy, keys_first, keys_last, values_first, keys_result, values_result, binary_pred);), + (ret = thrust::unique_by_key_copy( + cvt_to_seq(derived_cast(policy)), keys_first, keys_last, values_first, keys_result, values_result, binary_pred);)); return ret; } -template -pair _CCCL_HOST_DEVICE -unique_by_key_copy(execution_policy &policy, - KeyInputIt keys_first, - KeyInputIt keys_last, - ValInputIt values_first, - KeyOutputIt keys_result, - ValOutputIt values_result) +template +pair _CCCL_HOST_DEVICE unique_by_key_copy( + execution_policy& policy, + KeyInputIt keys_first, + KeyInputIt keys_last, + ValInputIt values_first, + KeyOutputIt keys_result, + ValOutputIt values_result) { typedef typename iterator_traits::value_type key_type; - return cuda_cub::unique_by_key_copy(policy, - keys_first, - keys_last, - values_first, - keys_result, - values_result, - equal_to()); + return cuda_cub::unique_by_key_copy( + policy, keys_first, keys_last, values_first, keys_result, values_result, equal_to()); } -template -pair _CCCL_HOST_DEVICE -unique_by_key(execution_policy &policy, - KeyInputIt keys_first, - KeyInputIt keys_last, - ValInputIt values_first, - BinaryPred binary_pred) +template +pair _CCCL_HOST_DEVICE unique_by_key( + execution_policy& policy, + KeyInputIt keys_first, + KeyInputIt keys_last, + ValInputIt values_first, + BinaryPred binary_pred) { auto ret = thrust::make_pair(keys_first, values_first); THRUST_CDP_DISPATCH( - (ret = cuda_cub::unique_by_key_copy(policy, - keys_first, - keys_last, - values_first, - keys_first, - values_first, - binary_pred);), - (ret = thrust::unique_by_key(cvt_to_seq(derived_cast(policy)), - keys_first, - keys_last, - values_first, - binary_pred);)); + (ret = cuda_cub::unique_by_key_copy( + policy, keys_first, keys_last, values_first, keys_first, values_first, binary_pred);), + (ret = thrust::unique_by_key(cvt_to_seq(derived_cast(policy)), keys_first, keys_last, values_first, binary_pred);)); return ret; } -template +template pair _CCCL_HOST_DEVICE -unique_by_key(execution_policy &policy, - KeyInputIt keys_first, - KeyInputIt keys_last, - ValInputIt values_first) +unique_by_key(execution_policy& policy, KeyInputIt keys_first, KeyInputIt keys_last, ValInputIt values_first) { typedef typename iterator_traits::value_type key_type; - return cuda_cub::unique_by_key(policy, - keys_first, - keys_last, - values_first, - equal_to()); + return cuda_cub::unique_by_key(policy, keys_first, keys_last, values_first, equal_to()); } - - -} // namespace cuda_cub +} // namespace cuda_cub THRUST_NAMESPACE_END -#include -#include +# include +# include #endif From e5dbdaa37346add5a2a5dcc8685e1cfe0a9f16ca Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 11:39:07 +0000 Subject: [PATCH 05/44] Fix a typo in thrust-config.cmake --- thrust/cmake/thrust-config.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thrust/cmake/thrust-config.cmake b/thrust/cmake/thrust-config.cmake index 74f703d08..3613b6bf1 100644 --- a/thrust/cmake/thrust-config.cmake +++ b/thrust/cmake/thrust-config.cmake @@ -357,7 +357,7 @@ function(thrust_debug_internal_targets) thrust_debug_target(OpenMP::OpenMP_CXX "${THRUST_OMP_VERSION}") _thrust_debug_backend_targets(TBB "${THRUST_TBB_VERSION}") - thrust_debug_target(TBB:tbb "${THRUST_TBB_VERSION}") + thrust_debug_target(TBB::tbb "${THRUST_TBB_VERSION}") _thrust_debug_backend_targets(CUDA "CUB ${THRUST_CUB_VERSION}") thrust_debug_target(CUB::CUB "${THRUST_CUB_VERSION}") From bd4301888db337cdde5b56b2484ad32decfed76f Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 11:50:05 +0000 Subject: [PATCH 06/44] Check that thrust::pair is trivially copyable --- test/test_pair.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/test_pair.cpp b/test/test_pair.cpp index 1d9e90a62..112b48dab 100644 --- a/test/test_pair.cpp +++ b/test/test_pair.cpp @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,12 +19,20 @@ #include #include +#include #include #include "test_header.hpp" TESTS_DEFINE(PairTests, NumericalTestsParams); +TYPED_TEST(PairTests, TestTriviallyCopyable) +{ + using T = typename TestFixture::input_type; + static_assert(std::is_trivially_copyable>::value, + "thrust::pair is not trivially copyable even though it should be!"); +} + TYPED_TEST(PairTests, TestPairManipulation) { using T = typename TestFixture::input_type; From b7b785ea71ad41fef73d66cb2c9927d4f179d988 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 11:51:23 +0000 Subject: [PATCH 07/44] Remove double ignore in discard_iterator.h docs --- thrust/iterator/discard_iterator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thrust/iterator/discard_iterator.h b/thrust/iterator/discard_iterator.h index af4a94960..8d071e9bb 100644 --- a/thrust/iterator/discard_iterator.h +++ b/thrust/iterator/discard_iterator.h @@ -44,7 +44,7 @@ THRUST_NAMESPACE_BEGIN * \p discard_iterator may also be used to count the size of an algorithm's output which * may not be known a priori. * - * The following code snippet demonstrates how to use \p discard_iterator to ignore + * The following code snippet demonstrates how to use \p discard_iterator to * ignore one of the output ranges of reduce_by_key * * \code From 93b72cd8b9c6e53f1ac8067a83440736521d5c1f Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 12:03:23 +0000 Subject: [PATCH 08/44] Replace deprecated _VSTD macro with std --- thrust/type_traits/is_contiguous_iterator.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/thrust/type_traits/is_contiguous_iterator.h b/thrust/type_traits/is_contiguous_iterator.h index 437b9079b..c5b9e2be8 100644 --- a/thrust/type_traits/is_contiguous_iterator.h +++ b/thrust/type_traits/is_contiguous_iterator.h @@ -139,7 +139,11 @@ struct is_libcxx_wrap_iter : false_type {}; #if defined(_LIBCPP_VERSION) template struct is_libcxx_wrap_iter< +# if _LIBCPP_VERSION < 14000 || THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_HIP _VSTD::__wrap_iter +# else + std::__wrap_iter +# endif > : true_type {}; #endif From f3e2676803732c50bbc373507e4620a6dc3c1049 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 12:06:29 +0000 Subject: [PATCH 09/44] Update mode example to use thrust::unique_count --- examples/mode.cu | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/examples/mode.cu b/examples/mode.cu index 2069adec1..ef3ab38de 100644 --- a/examples/mode.cu +++ b/examples/mode.cu @@ -2,12 +2,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include @@ -31,7 +31,7 @@ int main(void) // transfer data to device thrust::device_vector d_data(h_data); - + // print the initial data std::cout << "initial data" << std::endl; thrust::copy(d_data.begin(), d_data.end(), std::ostream_iterator(std::cout, " ")); @@ -39,18 +39,14 @@ int main(void) // sort data to bring equal elements together thrust::sort(d_data.begin(), d_data.end()); - + // print the sorted data std::cout << "sorted data" << std::endl; thrust::copy(d_data.begin(), d_data.end(), std::ostream_iterator(std::cout, " ")); std::cout << std::endl; // count number of unique keys - size_t num_unique = thrust::inner_product(d_data.begin(), d_data.end() - 1, - d_data.begin() + 1, - 0, - thrust::plus(), - thrust::not_equal_to()) + 1; + size_t num_unique = thrust::unique_count(d_data.begin(), d_data.end()); // count multiplicity of each key thrust::device_vector d_output_keys(num_unique); @@ -59,7 +55,7 @@ int main(void) thrust::constant_iterator(1), d_output_keys.begin(), d_output_counts.begin()); - + // print the counts std::cout << "values" << std::endl; thrust::copy(d_output_keys.begin(), d_output_keys.end(), std::ostream_iterator(std::cout, " ")); @@ -76,9 +72,9 @@ int main(void) int mode = d_output_keys[mode_iter - d_output_counts.begin()]; int occurances = *mode_iter; - + std::cout << "Modal value " << mode << " occurs " << occurances << " times " << std::endl; - + return 0; } From 44d7369182b66301c65f724867e2aa103874ffc4 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 12:22:55 +0000 Subject: [PATCH 10/44] Ensure that thrust fancy iterators are trivially_copy_constructible when possible --- testing/constant_iterator.cu | 18 +++++----- testing/counting_iterator.cu | 34 +++++++++--------- testing/discard_iterator.cu | 18 +++++----- testing/permutation_iterator.cu | 38 +++++++++++---------- testing/reverse_iterator.cu | 4 ++- testing/zip_iterator.cu | 7 ++-- thrust/detail/type_traits.h | 3 +- thrust/iterator/constant_iterator.h | 34 +++++++----------- thrust/iterator/counting_iterator.h | 29 +++++----------- thrust/iterator/detail/reverse_iterator.inl | 27 --------------- thrust/iterator/detail/zip_iterator.inl | 23 ------------- thrust/iterator/discard_iterator.h | 12 ------- thrust/iterator/permutation_iterator.h | 21 ++++++------ thrust/iterator/reverse_iterator.h | 32 ++++++++--------- thrust/iterator/zip_iterator.h | 21 ++++++------ 15 files changed, 123 insertions(+), 198 deletions(-) diff --git a/testing/constant_iterator.cu b/testing/constant_iterator.cu index e42cfea8d..f7a4a2e31 100644 --- a/testing/constant_iterator.cu +++ b/testing/constant_iterator.cu @@ -30,21 +30,23 @@ void TestConstantIteratorIncrement(void) lhs++; ASSERT_EQUAL(1, lhs - rhs); - + lhs++; lhs++; - + ASSERT_EQUAL(3, lhs - rhs); lhs += 5; - + ASSERT_EQUAL(8, lhs - rhs); lhs -= 10; - + ASSERT_EQUAL(-2, lhs - rhs); } DECLARE_UNITTEST(TestConstantIteratorIncrement); +static_assert(std::is_trivially_copy_constructible>::value, ""); +static_assert(std::is_trivially_copyable>::value, ""); void TestConstantIteratorIncrementBig(void) { @@ -68,15 +70,15 @@ void TestConstantIteratorComparison(void) ASSERT_EQUAL(true, iter1 == iter2); iter1++; - + ASSERT_EQUAL(1, iter1 - iter2); ASSERT_EQUAL(false, iter1 == iter2); - + iter2++; ASSERT_EQUAL(0, iter1 - iter2); ASSERT_EQUAL(true, iter1 == iter2); - + iter1 += 100; iter2 += 100; @@ -146,7 +148,7 @@ void TestConstantIteratorTransform(void) ASSERT_EQUAL(-7, result[1]); ASSERT_EQUAL(-7, result[2]); ASSERT_EQUAL(-7, result[3]); - + thrust::transform(first1, last1, first2, result.begin(), thrust::plus()); ASSERT_EQUAL(10, result[0]); diff --git a/testing/counting_iterator.cu b/testing/counting_iterator.cu index ebefe4d64..05a6839d6 100644 --- a/testing/counting_iterator.cu +++ b/testing/counting_iterator.cu @@ -33,6 +33,8 @@ void TestCountingIteratorCopyConstructor(void) ASSERT_EQUAL(*iter0, *d_iter); } DECLARE_UNITTEST(TestCountingIteratorCopyConstructor); +static_assert(std::is_trivially_copy_constructible>::value, ""); +static_assert(std::is_trivially_copyable>::value, ""); void TestCountingIteratorIncrement(void) @@ -44,18 +46,18 @@ void TestCountingIteratorIncrement(void) iter++; ASSERT_EQUAL(*iter, 1); - + iter++; iter++; - + ASSERT_EQUAL(*iter, 3); iter += 5; - + ASSERT_EQUAL(*iter, 8); iter -= 10; - + ASSERT_EQUAL(*iter, -2); } DECLARE_UNITTEST(TestCountingIteratorIncrement); @@ -70,15 +72,15 @@ void TestCountingIteratorComparison(void) ASSERT_EQUAL(iter1 == iter2, true); iter1++; - + ASSERT_EQUAL(iter1 - iter2, 1); ASSERT_EQUAL(iter1 == iter2, false); - + iter2++; ASSERT_EQUAL(iter1 - iter2, 0); ASSERT_EQUAL(iter1 == iter2, true); - + iter1 += 100; iter2 += 100; @@ -99,19 +101,19 @@ void TestCountingIteratorFloatComparison(void) ASSERT_EQUAL(iter2 < iter1, false); iter1++; - + ASSERT_EQUAL(iter1 - iter2, 1); ASSERT_EQUAL(iter1 == iter2, false); - ASSERT_EQUAL(iter2 < iter1, true); - ASSERT_EQUAL(iter1 < iter2, false); - + ASSERT_EQUAL(iter2 < iter1, true); + ASSERT_EQUAL(iter1 < iter2, false); + iter2++; ASSERT_EQUAL(iter1 - iter2, 0); ASSERT_EQUAL(iter1 == iter2, true); ASSERT_EQUAL(iter1 < iter2, false); ASSERT_EQUAL(iter2 < iter1, false); - + iter1 += 100; iter2 += 100; @@ -130,12 +132,12 @@ void TestCountingIteratorFloatComparison(void) ASSERT_EQUAL(iter4 < iter3, false); iter3++; // iter3 = 1.0, iter4 = 0.5 - + ASSERT_EQUAL(iter3 - iter4, 0); ASSERT_EQUAL(iter3 == iter4, true); ASSERT_EQUAL(iter3 < iter4, false); ASSERT_EQUAL(iter4 < iter3, false); - + iter4++; // iter3 = 1.0, iter4 = 1.5 ASSERT_EQUAL(iter3 - iter4, 0); @@ -162,9 +164,9 @@ void TestCountingIteratorDistance(void) ASSERT_EQUAL(thrust::distance(iter1, iter2), 5); iter1++; - + ASSERT_EQUAL(thrust::distance(iter1, iter2), 4); - + iter2 += 100; ASSERT_EQUAL(thrust::distance(iter1, iter2), 104); diff --git a/testing/discard_iterator.cu b/testing/discard_iterator.cu index f5933559d..4f04f7437 100644 --- a/testing/discard_iterator.cu +++ b/testing/discard_iterator.cu @@ -11,21 +11,23 @@ void TestDiscardIteratorIncrement(void) lhs++; ASSERT_EQUAL(1, lhs - rhs); - + lhs++; lhs++; - + ASSERT_EQUAL(3, lhs - rhs); lhs += 5; - + ASSERT_EQUAL(8, lhs - rhs); lhs -= 10; - + ASSERT_EQUAL(-2, lhs - rhs); } DECLARE_UNITTEST(TestDiscardIteratorIncrement); +static_assert(std::is_trivially_copy_constructible>::value, ""); +static_assert(std::is_trivially_copyable>::value, ""); void TestDiscardIteratorComparison(void) { @@ -36,15 +38,15 @@ void TestDiscardIteratorComparison(void) ASSERT_EQUAL(true, iter1 == iter2); iter1++; - + ASSERT_EQUAL(1, iter1 - iter2); ASSERT_EQUAL(false, iter1 == iter2); - + iter2++; ASSERT_EQUAL(0, iter1 - iter2); ASSERT_EQUAL(true, iter1 == iter2); - + iter1 += 100; iter2 += 100; @@ -82,7 +84,7 @@ void TestZippedDiscardIterator(void) { ; } - + ASSERT_EQUAL(10, thrust::get<0>(z_iter1_first.get_iterator_tuple()) - thrust::make_discard_iterator()); typedef tuple > IteratorTuple2; diff --git a/testing/permutation_iterator.cu b/testing/permutation_iterator.cu index 22fef650c..d319bf096 100644 --- a/testing/permutation_iterator.cu +++ b/testing/permutation_iterator.cu @@ -14,7 +14,7 @@ void TestPermutationIteratorSimple(void) Vector source(8); Vector indices(4); - + // initialize input thrust::sequence(source.begin(), source.end(), 1); @@ -22,7 +22,7 @@ void TestPermutationIteratorSimple(void) indices[1] = 0; indices[2] = 5; indices[3] = 7; - + thrust::permutation_iterator begin(source.begin(), indices.begin()); thrust::permutation_iterator end(source.begin(), indices.end()); @@ -53,6 +53,8 @@ void TestPermutationIteratorSimple(void) ASSERT_EQUAL(source[7], 8); } DECLARE_INTEGRAL_VECTOR_UNITTEST(TestPermutationIteratorSimple); +static_assert(std::is_trivially_copy_constructible>::value, ""); +static_assert(std::is_trivially_copyable>::value, ""); template void TestPermutationIteratorGather(void) @@ -62,7 +64,7 @@ void TestPermutationIteratorGather(void) Vector source(8); Vector indices(4); Vector output(4, 10); - + // initialize input thrust::sequence(source.begin(), source.end(), 1); @@ -70,7 +72,7 @@ void TestPermutationIteratorGather(void) indices[1] = 0; indices[2] = 5; indices[3] = 7; - + thrust::permutation_iterator p_source(source.begin(), indices.begin()); thrust::copy(p_source, p_source + 4, output.begin()); @@ -90,7 +92,7 @@ void TestPermutationIteratorScatter(void) Vector source(4, 10); Vector indices(4); Vector output(8); - + // initialize output thrust::sequence(output.begin(), output.end(), 1); @@ -98,7 +100,7 @@ void TestPermutationIteratorScatter(void) indices[1] = 0; indices[2] = 5; indices[3] = 7; - + // construct transform_iterator thrust::permutation_iterator p_output(output.begin(), indices.begin()); @@ -121,7 +123,7 @@ void TestMakePermutationIterator(void) Vector source(8); Vector indices(4); Vector output(4, 10); - + // initialize input thrust::sequence(source.begin(), source.end(), 1); @@ -129,7 +131,7 @@ void TestMakePermutationIterator(void) indices[1] = 0; indices[2] = 5; indices[3] = 7; - + thrust::copy(thrust::make_permutation_iterator(source.begin(), indices.begin()), thrust::make_permutation_iterator(source.begin(), indices.begin()) + 4, output.begin()); @@ -150,7 +152,7 @@ void TestPermutationIteratorReduce(void) Vector source(8); Vector indices(4); Vector output(4, 10); - + // initialize input thrust::sequence(source.begin(), source.end(), 1); @@ -158,7 +160,7 @@ void TestPermutationIteratorReduce(void) indices[1] = 0; indices[2] = 5; indices[3] = 7; - + // construct transform_iterator thrust::permutation_iterator iter(source.begin(), indices.begin()); @@ -166,7 +168,7 @@ void TestPermutationIteratorReduce(void) thrust::make_permutation_iterator(source.begin(), indices.begin()) + 4); ASSERT_EQUAL(result1, 19); - + T result2 = thrust::transform_reduce(thrust::make_permutation_iterator(source.begin(), indices.begin()), thrust::make_permutation_iterator(source.begin(), indices.begin()) + 4, thrust::negate(), @@ -187,7 +189,7 @@ void TestPermutationIteratorHostDeviceGather(void) HostVector h_source(8); HostVector h_indices(4); HostVector h_output(4, 10); - + DeviceVector d_source(8); DeviceVector d_indices(4); DeviceVector d_output(4, 10); @@ -200,7 +202,7 @@ void TestPermutationIteratorHostDeviceGather(void) h_indices[1] = d_indices[1] = 0; h_indices[2] = d_indices[2] = 5; h_indices[3] = d_indices[3] = 7; - + thrust::permutation_iterator p_h_source(h_source.begin(), h_indices.begin()); thrust::permutation_iterator p_d_source(d_source.begin(), d_indices.begin()); @@ -211,7 +213,7 @@ void TestPermutationIteratorHostDeviceGather(void) ASSERT_EQUAL(d_output[1], 1); ASSERT_EQUAL(d_output[2], 6); ASSERT_EQUAL(d_output[3], 8); - + // gather device->host thrust::copy(p_d_source, p_d_source + 4, h_output.begin()); @@ -233,7 +235,7 @@ void TestPermutationIteratorHostDeviceScatter(void) HostVector h_source(4,10); HostVector h_indices(4); HostVector h_output(8); - + DeviceVector d_source(4,10); DeviceVector d_indices(4); DeviceVector d_output(8); @@ -246,7 +248,7 @@ void TestPermutationIteratorHostDeviceScatter(void) h_indices[1] = d_indices[1] = 0; h_indices[2] = d_indices[2] = 5; h_indices[3] = d_indices[3] = 7; - + thrust::permutation_iterator p_h_output(h_output.begin(), h_indices.begin()); thrust::permutation_iterator p_d_output(d_output.begin(), d_indices.begin()); @@ -261,7 +263,7 @@ void TestPermutationIteratorHostDeviceScatter(void) ASSERT_EQUAL(d_output[5], 10); ASSERT_EQUAL(d_output[6], 7); ASSERT_EQUAL(d_output[7], 10); - + // scatter device->host thrust::copy(d_source.begin(), d_source.end(), p_h_output); @@ -281,7 +283,7 @@ void TestPermutationIteratorWithCountingIterator(void) { using T = typename Vector::value_type; using diff_t = typename thrust::counting_iterator::difference_type; - + thrust::counting_iterator input(0), index(0); // test copy() diff --git a/testing/reverse_iterator.cu b/testing/reverse_iterator.cu index 1571456f1..9d7170bd8 100644 --- a/testing/reverse_iterator.cu +++ b/testing/reverse_iterator.cu @@ -23,6 +23,8 @@ void TestReverseIteratorCopyConstructor(void) ASSERT_EQUAL(*d_iter2, *d_iter3); } DECLARE_UNITTEST(TestReverseIteratorCopyConstructor); +static_assert(std::is_trivially_copy_constructible>::value, ""); +static_assert(std::is_trivially_copyable>::value, ""); void TestReverseIteratorIncrement(void) { @@ -71,7 +73,7 @@ void TestReverseIteratorCopy(void) source[3] = 40; Vector destination(4,0); - + thrust::copy(thrust::make_reverse_iterator(source.end()), thrust::make_reverse_iterator(source.begin()), destination.begin()); diff --git a/testing/zip_iterator.cu b/testing/zip_iterator.cu index b2493531c..9d103df0f 100644 --- a/testing/zip_iterator.cu +++ b/testing/zip_iterator.cu @@ -90,6 +90,7 @@ template } }; SimpleUnitTest > TestZipIteratorManipulationInstance; +static_assert(std::is_trivially_copy_constructible>>::value, ""); template struct TestZipIteratorReference @@ -224,7 +225,7 @@ template //ASSERT_EQUAL(true, (detail::is_convertible::value) ); - + #if 0 // test host/any typedef tuple IteratorTuple4; @@ -346,8 +347,8 @@ struct TestZipIteratorTransform d_result.begin(), SumTwoTuple()); ASSERT_EQUAL(h_result, d_result); - - + + // Tuples with 3 elements transform( make_zip_iterator(make_tuple(h_data0.begin(), h_data1.begin(), h_data2.begin())), make_zip_iterator(make_tuple(h_data0.end(), h_data1.end(), h_data2.end())), diff --git a/thrust/detail/type_traits.h b/thrust/detail/type_traits.h index 2eac468cd..9e981fee6 100644 --- a/thrust/detail/type_traits.h +++ b/thrust/detail/type_traits.h @@ -515,6 +515,8 @@ template : enable_if< is_convertible::value, T > {}; +template +using enable_if_convertible_t = typename enable_if_convertible::type; template struct disable_if_convertible @@ -527,7 +529,6 @@ template : enable_if::value, Result> {}; - template struct is_numeric : and_< diff --git a/thrust/iterator/constant_iterator.h b/thrust/iterator/constant_iterator.h index 747d75972..df3a869d1 100644 --- a/thrust/iterator/constant_iterator.h +++ b/thrust/iterator/constant_iterator.h @@ -112,35 +112,25 @@ template - THRUST_HOST_DEVICE - constant_iterator(constant_iterator const &rhs, - typename thrust::detail::enable_if_convertible< - typename thrust::iterator_system >::type, - typename thrust::iterator_system::type - >::type * = 0) - : super_t(rhs.base()), m_value(rhs.value()) {} + template >::type, + typename thrust::iterator_system::type, + int> = 0> + THRUST_HOST_DEVICE constant_iterator(constant_iterator const& rhs) + : super_t(rhs.base()) + , m_value(rhs.value()) + {} /*! This constructor receives a value to use as the constant value of this * \p constant_iterator and an index specifying the location of this diff --git a/thrust/iterator/counting_iterator.h b/thrust/iterator/counting_iterator.h index 01f6a98c9..71f838b73 100644 --- a/thrust/iterator/counting_iterator.h +++ b/thrust/iterator/counting_iterator.h @@ -150,27 +150,20 @@ template - THRUST_HOST_DEVICE - counting_iterator(counting_iterator const &rhs, - typename thrust::detail::enable_if_convertible< - typename thrust::iterator_system >::type, - typename thrust::iterator_system::type - >::type * = 0) - : super_t(rhs.base()){} + template < + class OtherSystem, + detail::enable_if_convertible_t< + typename thrust::iterator_system>::type, + typename thrust::iterator_system::type, + int> = 0> + THRUST_HOST_DEVICE counting_iterator(counting_iterator const& rhs) + : super_t(rhs.base()) + {} /*! This \c explicit constructor copies the value of an \c Incrementable * into a new \p counting_iterator's \c Incrementable counter. @@ -181,10 +174,6 @@ template= 2011 - counting_iterator & operator=(const counting_iterator &) = default; -#endif - /*! \cond */ private: diff --git a/thrust/iterator/detail/reverse_iterator.inl b/thrust/iterator/detail/reverse_iterator.inl index e27235439..6ef26a897 100644 --- a/thrust/iterator/detail/reverse_iterator.inl +++ b/thrust/iterator/detail/reverse_iterator.inl @@ -36,33 +36,6 @@ template } // end detail -template - THRUST_HOST_DEVICE - reverse_iterator - ::reverse_iterator(BidirectionalIterator x) - :super_t(x) -{ -} // end reverse_iterator::reverse_iterator() - -template - template - THRUST_HOST_DEVICE - reverse_iterator - ::reverse_iterator(reverse_iterator const &r -// XXX msvc screws this up -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - , typename thrust::detail::enable_if< - thrust::detail::is_convertible< - OtherBidirectionalIterator, - BidirectionalIterator - >::value - >::type * -#endif // MSVC - ) - :super_t(r.base()) -{ -} // end reverse_iterator::reverse_iterator() - template THRUST_HOST_DEVICE typename reverse_iterator::super_t::reference diff --git a/thrust/iterator/detail/zip_iterator.inl b/thrust/iterator/detail/zip_iterator.inl index 36a0ebf7c..53cf6a183 100644 --- a/thrust/iterator/detail/zip_iterator.inl +++ b/thrust/iterator/detail/zip_iterator.inl @@ -24,14 +24,6 @@ THRUST_NAMESPACE_BEGIN -template -THRUST_HOST_DEVICE - zip_iterator - ::zip_iterator() -{ -} // end zip_iterator::zip_iterator() - - template THRUST_HOST_DEVICE zip_iterator @@ -40,21 +32,6 @@ THRUST_HOST_DEVICE { } // end zip_iterator::zip_iterator() - -template - template - THRUST_HOST_DEVICE - zip_iterator - ::zip_iterator(const zip_iterator &other, - typename thrust::detail::enable_if_convertible< - OtherIteratorTuple, - IteratorTuple - >::type *) - :m_iterator_tuple(other.get_iterator_tuple()) -{ -} // end zip_iterator::zip_iterator() - - template THRUST_HOST_DEVICE const IteratorTuple &zip_iterator diff --git a/thrust/iterator/discard_iterator.h b/thrust/iterator/discard_iterator.h index 8d071e9bb..29e9e28d8 100644 --- a/thrust/iterator/discard_iterator.h +++ b/thrust/iterator/discard_iterator.h @@ -107,18 +107,6 @@ template /*! \endcond */ - /*! Copy constructor copies from a source discard_iterator. - * - * \p rhs The discard_iterator to copy. - */ - THRUST_HOST_DEVICE - discard_iterator(discard_iterator const &rhs) - : super_t(rhs.base()) {} - -#if THRUST_CPP_DIALECT >= 2011 - discard_iterator & operator=(const discard_iterator &) = default; -#endif - /*! This constructor receives an optional index specifying the position of this * \p discard_iterator in a range. * diff --git a/thrust/iterator/permutation_iterator.h b/thrust/iterator/permutation_iterator.h index e0a1a9526..ed5c4bc90 100644 --- a/thrust/iterator/permutation_iterator.h +++ b/thrust/iterator/permutation_iterator.h @@ -149,20 +149,19 @@ template - THRUST_HOST_DEVICE - permutation_iterator(permutation_iterator const &r - // XXX remove these guards when we have static_assert - , typename detail::enable_if_convertible::type* = 0 - , typename detail::enable_if_convertible::type* = 0 - ) - : super_t(r.base()), m_element_iterator(r.m_element_iterator) + template = 0, + detail::enable_if_convertible_t = 0> + THRUST_HOST_DEVICE permutation_iterator(permutation_iterator const& rhs) + : super_t(rhs.base()) + , m_element_iterator(rhs.m_element_iterator) {} - /*! \cond - */ + /*! \cond + */ private: // MSVC 2013 and 2015 incorrectly warning about returning a reference to // a local/temporary here. diff --git a/thrust/iterator/reverse_iterator.h b/thrust/iterator/reverse_iterator.h index a642379c2..94531b600 100644 --- a/thrust/iterator/reverse_iterator.h +++ b/thrust/iterator/reverse_iterator.h @@ -158,8 +158,11 @@ template public: /*! Default constructor does nothing. */ - THRUST_HOST_DEVICE - reverse_iterator() {} +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC && THRUST_MSVC_VERSION < 1920 + THRUST_HOST_DEVICE reverse_iterator() {} +#else + reverse_iterator() = default; +#endif /*! \p Constructor accepts a \c BidirectionalIterator pointing to a range * for this \p reverse_iterator to reverse. @@ -167,27 +170,20 @@ template * \param x A \c BidirectionalIterator pointing to a range to reverse. */ THRUST_HOST_DEVICE - explicit reverse_iterator(BidirectionalIterator x); + explicit reverse_iterator(BidirectionalIterator x) + : super_t(x) + {} /*! \p Copy constructor allows construction from a related compatible * \p reverse_iterator. * - * \param r A \p reverse_iterator to copy from. + * \param rhs A \p reverse_iterator to copy from. */ - template - THRUST_HOST_DEVICE - reverse_iterator(reverse_iterator const &r -// XXX msvc screws this up -// XXX remove these guards when we have static_assert -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - , typename thrust::detail::enable_if< - thrust::detail::is_convertible< - OtherBidirectionalIterator, - BidirectionalIterator - >::value - >::type * = 0 -#endif // MSVC - ); + template = 0> + THRUST_HOST_DEVICE reverse_iterator(reverse_iterator const& rhs) + : super_t(rhs.base()) + {} /*! \cond */ diff --git a/thrust/iterator/zip_iterator.h b/thrust/iterator/zip_iterator.h index a718cd4c4..e6718082f 100644 --- a/thrust/iterator/zip_iterator.h +++ b/thrust/iterator/zip_iterator.h @@ -140,10 +140,13 @@ template : public detail::zip_iterator_base::type { public: - /*! Null constructor does nothing. + /*! Default constructor does nothing. */ - inline THRUST_HOST_DEVICE - zip_iterator(); +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC && THRUST_MSVC_VERSION < 1920 + inline THRUST_HOST_DEVICE zip_iterator() {} +#else + zip_iterator() = default; +#endif /*! This constructor creates a new \p zip_iterator from a * \p tuple of iterators. @@ -158,13 +161,11 @@ template * * \param other The \p zip_iterator to copy. */ - template - inline THRUST_HOST_DEVICE - zip_iterator(const zip_iterator &other, - typename thrust::detail::enable_if_convertible< - OtherIteratorTuple, - IteratorTuple - >::type * = 0); + template = 0> + inline THRUST_HOST_DEVICE zip_iterator(const zip_iterator& other) + : m_iterator_tuple(other.get_iterator_tuple()) + {} /*! This method returns a \c const reference to this \p zip_iterator's * \p tuple of iterators. From a32a67ce5033d9e9a1475373b7c7738a2220e23f Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Tue, 30 Jul 2024 12:31:04 +0000 Subject: [PATCH 11/44] Use checked allocators in CUB catch2 tests --- thrust/detail/vector_base.h | 25 ++++++++++++------------ thrust/detail/vector_base.inl | 36 ++++++++++++----------------------- thrust/mr/new.h | 16 ++++++++++------ 3 files changed, 34 insertions(+), 43 deletions(-) diff --git a/thrust/detail/vector_base.h b/thrust/detail/vector_base.h index 4e1cffea9..141cd4c20 100644 --- a/thrust/detail/vector_base.h +++ b/thrust/detail/vector_base.h @@ -564,8 +564,6 @@ template storage_type &new_storage); }; // end vector_base -} // end detail - /*! This function assigns the contents of vector a to vector b and the * contents of vector b to vector a. * @@ -575,8 +573,8 @@ template * of a will be returned here. */ template - void swap(detail::vector_base &a, - detail::vector_base &b); + void swap(vector_base &a, + vector_base &b); /*! This operator allows comparison between two vectors. @@ -587,18 +585,18 @@ template */ template -bool operator==(const detail::vector_base& lhs, - const detail::vector_base& rhs); +bool operator==(const vector_base& lhs, + const vector_base& rhs); template -bool operator==(const detail::vector_base& lhs, +bool operator==(const vector_base& lhs, const std::vector& rhs); template bool operator==(const std::vector& lhs, - const detail::vector_base& rhs); + const vector_base& rhs); /*! This operator allows comparison between two vectors. * \param lhs The first \p vector to compare. @@ -608,20 +606,21 @@ bool operator==(const std::vector& lhs, */ template -bool operator!=(const detail::vector_base& lhs, - const detail::vector_base& rhs); +bool operator!=(const vector_base& lhs, + const vector_base& rhs); template -bool operator!=(const detail::vector_base& lhs, +bool operator!=(const vector_base& lhs, const std::vector& rhs); template bool operator!=(const std::vector& lhs, - const detail::vector_base& rhs); + const vector_base& rhs); + +} // end detail THRUST_NAMESPACE_END #include - diff --git a/thrust/detail/vector_base.inl b/thrust/detail/vector_base.inl index c75e6fd95..6b3774682 100644 --- a/thrust/detail/vector_base.inl +++ b/thrust/detail/vector_base.inl @@ -1254,21 +1254,13 @@ template } // end catch } // end vector_base::allocate_and_copy() - -} // end detail - template - void swap(detail::vector_base &a, - detail::vector_base &b) + void swap(vector_base &a, + vector_base &b) { a.swap(b); } // end swap() - - -namespace detail -{ - // iterator tags match template bool vector_equal(InputIterator1 first1, InputIterator1 last1, @@ -1312,22 +1304,17 @@ bool vector_equal(InputIterator1 first1, InputIterator1 last1, thrust::detail::is_same()); } -} // end namespace detail - - - - template -bool operator==(const detail::vector_base& lhs, - const detail::vector_base& rhs) +bool operator==(const vector_base& lhs, + const vector_base& rhs) { return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); } template -bool operator==(const detail::vector_base& lhs, +bool operator==(const vector_base& lhs, const std::vector& rhs) { return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); @@ -1336,22 +1323,22 @@ bool operator==(const detail::vector_base& lhs, template bool operator==(const std::vector& lhs, - const detail::vector_base& rhs) + const vector_base& rhs) { return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); } template -bool operator!=(const detail::vector_base& lhs, - const detail::vector_base& rhs) +bool operator!=(const vector_base& lhs, + const vector_base& rhs) { return !(lhs == rhs); } template -bool operator!=(const detail::vector_base& lhs, +bool operator!=(const vector_base& lhs, const std::vector& rhs) { return !(lhs == rhs); @@ -1360,10 +1347,11 @@ bool operator!=(const detail::vector_base& lhs, template bool operator!=(const std::vector& lhs, - const detail::vector_base& rhs) + const vector_base& rhs) { return !(lhs == rhs); } -THRUST_NAMESPACE_END +} // end namespace detail +THRUST_NAMESPACE_END diff --git a/thrust/mr/new.h b/thrust/mr/new.h index 6c2b3f4ca..40c0532e3 100644 --- a/thrust/mr/new.h +++ b/thrust/mr/new.h @@ -1,6 +1,6 @@ /* * Copyright 2018 NVIDIA Corporation - * Modifications Copyright 2023 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright 2023-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,10 +34,8 @@ namespace mr * \{ */ -/*! A memory resource that uses global operators new and delete to allocate and deallocate memory. Uses alignment-enabled - * overloads when available, otherwise uses regular overloads and implements alignment requirements by itself. - */ -class new_delete_resource final : public memory_resource<> +#if !THRUST_DOXYGEN +class new_delete_resource_base : public memory_resource<> { public: /*! Allocates memory of size at least \p bytes and alignment at least \p alignment. @@ -96,10 +94,16 @@ class new_delete_resource final : public memory_resource<> #endif } }; +#endif // !THRUST_DOXYGEN + +/*! A memory resource that uses global operators new and delete to allocate and deallocate memory. Uses alignment-enabled + * overloads when available, otherwise uses regular overloads and implements alignment requirements by itself. + */ +class new_delete_resource final : public new_delete_resource_base +{}; /*! \} // memory_resources */ } // end mr THRUST_NAMESPACE_END - From b7410172ad40f2aa8c215b7851a43ce55e9cb4e0 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 13:59:29 +0000 Subject: [PATCH 12/44] Refactors thrust::copy_if to use cub::DeviceSelect --- testing/cuda/copy_if.cu | 92 ++ .../system/cuda/detail/adjacent_difference.h | 1 - thrust/system/cuda/detail/copy_if.h | 956 ++++-------------- 3 files changed, 284 insertions(+), 765 deletions(-) diff --git a/testing/cuda/copy_if.cu b/testing/cuda/copy_if.cu index bb879b671..751eca2fc 100644 --- a/testing/cuda/copy_if.cu +++ b/testing/cuda/copy_if.cu @@ -1,3 +1,4 @@ +#include "thrust/iterator/transform_iterator.h" #include #include #include @@ -18,6 +19,25 @@ struct mod_3 unsigned int operator()(T x) { return static_cast(x) % 3; } }; +template +struct mod_n +{ + T mod; + __host__ __device__ bool operator()(T x) + { + return (x % mod == 0) ? true : false; + } +}; + +template +struct multiply_n +{ + T multiplier; + __host__ __device__ T operator()(T x) + { + return x * multiplier; + } +}; #ifdef THRUST_TEST_DEVICE_SIDE template @@ -284,3 +304,75 @@ void TestCopyIfStencilCudaStreamsNoSync() } DECLARE_UNITTEST(TestCopyIfStencilCudaStreamsNoSync); +void TestCopyIfWithMagnitude(int magnitude) +{ + using offset_t = std::size_t; + + // Prepare input + offset_t num_items = offset_t{1ull} << magnitude; + thrust::counting_iterator begin(offset_t{0}); + auto end = begin + num_items; + ASSERT_EQUAL(static_cast(thrust::distance(begin, end)), num_items); + + // Run algorithm on large number of items + offset_t match_every_nth = 1000000; + offset_t expected_num_copied = (num_items + match_every_nth - 1) / match_every_nth; + thrust::device_vector copied_out(expected_num_copied); + auto selected_out_end = thrust::copy_if(begin, end, copied_out.begin(), mod_n{match_every_nth}); + + // Ensure number of selected items are correct + offset_t num_selected_out = static_cast(thrust::distance(copied_out.begin(), selected_out_end)); + ASSERT_EQUAL(num_selected_out, expected_num_copied); + copied_out.resize(expected_num_copied); + + // Ensure selected items are correct + auto expected_out_it = thrust::make_transform_iterator(begin, multiply_n{match_every_nth}); + bool all_results_correct = thrust::equal(copied_out.begin(), copied_out.end(), expected_out_it); + ASSERT_EQUAL(all_results_correct, true); +} + +void TestCopyIfWithLargeNumberOfItems() +{ + TestCopyIfWithMagnitude(30); + TestCopyIfWithMagnitude(31); + TestCopyIfWithMagnitude(32); + TestCopyIfWithMagnitude(33); +} +DECLARE_UNITTEST(TestCopyIfWithLargeNumberOfItems); + +void TestCopyIfStencilWithMagnitude(int magnitude) +{ + using offset_t = std::size_t; + + // Prepare input + offset_t num_items = offset_t{1ull} << magnitude; + thrust::counting_iterator begin(offset_t{0}); + auto end = begin + num_items; + thrust::counting_iterator stencil(offset_t{0}); + ASSERT_EQUAL(static_cast(thrust::distance(begin, end)), num_items); + + // Run algorithm on large number of items + offset_t match_every_nth = 1000000; + offset_t expected_num_copied = (num_items + match_every_nth - 1) / match_every_nth; + thrust::device_vector copied_out(expected_num_copied); + auto selected_out_end = thrust::copy_if(begin, end, stencil, copied_out.begin(), mod_n{match_every_nth}); + + // Ensure number of selected items are correct + offset_t num_selected_out = static_cast(thrust::distance(copied_out.begin(), selected_out_end)); + ASSERT_EQUAL(num_selected_out, expected_num_copied); + copied_out.resize(expected_num_copied); + + // Ensure selected items are correct + auto expected_out_it = thrust::make_transform_iterator(begin, multiply_n{match_every_nth}); + bool all_results_correct = thrust::equal(copied_out.begin(), copied_out.end(), expected_out_it); + ASSERT_EQUAL(all_results_correct, true); +} + +void TestCopyIfStencilWithLargeNumberOfItems() +{ + TestCopyIfStencilWithMagnitude(30); + TestCopyIfStencilWithMagnitude(31); + TestCopyIfStencilWithMagnitude(32); + TestCopyIfStencilWithMagnitude(33); +} +DECLARE_UNITTEST(TestCopyIfStencilWithLargeNumberOfItems); diff --git a/thrust/system/cuda/detail/adjacent_difference.h b/thrust/system/cuda/detail/adjacent_difference.h index 8e3403aab..9b939d580 100644 --- a/thrust/system/cuda/detail/adjacent_difference.h +++ b/thrust/system/cuda/detail/adjacent_difference.h @@ -44,7 +44,6 @@ #include #include -#include #include THRUST_NAMESPACE_BEGIN diff --git a/thrust/system/cuda/detail/copy_if.h b/thrust/system/cuda/detail/copy_if.h index 997793640..ec7f75cd9 100644 --- a/thrust/system/cuda/detail/copy_if.h +++ b/thrust/system/cuda/detail/copy_if.h @@ -30,802 +30,230 @@ #if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include THRUST_NAMESPACE_BEGIN // XXX declare generic copy_if interface // to avoid circulular dependency from thrust/copy.h template -_CCCL_HOST_DEVICE - OutputIterator - copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - -template -_CCCL_HOST_DEVICE - OutputIterator - copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - -namespace cuda_cub { - -namespace __copy_if { - - template - struct PtxPolicy - { - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD, - }; - static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; - static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - static const cub::BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; - }; // struct PtxPolicy - - template - struct Tuning; - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); - - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 9, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef PtxPolicy<128, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_LDG, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning<350> - - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); +_CCCL_HOST_DEVICE OutputIterator copy_if( + const thrust::detail::execution_policy_base& exec, + InputIterator first, + InputIterator last, + OutputIterator result, + Predicate pred); + +template +_CCCL_HOST_DEVICE OutputIterator copy_if( + const thrust::detail::execution_policy_base& exec, + InputIterator1 first, + InputIterator1 last, + InputIterator2 stencil, + OutputIterator result, + Predicate pred); + +namespace cuda_cub +{ - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 10, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef PtxPolicy<128, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_LDG, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning<350> - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); +namespace detail +{ - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 7, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(3, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef PtxPolicy<128, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_DEFAULT, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning<300> - - struct no_stencil_tag_ {}; - typedef no_stencil_tag_* no_stencil_tag; - template - struct CopyIfAgent +template +struct DispatchCopyIf +{ + static cudaError_t THRUST_RUNTIME_FUNCTION dispatch( + execution_policy& policy, + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIt first, + StencilIt stencil, + OutputIt output, + Predicate predicate, + OffsetT num_items, + OutputIt& output_end) { - typedef typename iterator_traits::value_type item_type; - typedef typename iterator_traits::value_type stencil_type; - - typedef cub::ScanTileState ScanTileState; + using num_selected_out_it_t = OffsetT*; + using equality_op_t = cub::NullType; + + cudaError_t status = cudaSuccess; + cudaStream_t stream = cuda_cub::stream(policy); + + std::size_t allocation_sizes[2] = {0, sizeof(OffsetT)}; + void* allocations[2] = {nullptr, nullptr}; + + // drop rejected items (i.e., this is not a partition, but a selection) + constexpr bool keep_rejects = false; + constexpr bool may_alias = false; + + // Query algorithm memory requirements + status = cub::DispatchSelectIf< + InputIt, + StencilIt, + OutputIt, + num_selected_out_it_t, + Predicate, + equality_op_t, + OffsetT, + keep_rejects, + may_alias>::Dispatch(nullptr, + allocation_sizes[0], + first, + stencil, + output, + static_cast(nullptr), + predicate, + equality_op_t{}, + num_items, + stream); + CUDA_CUB_RET_IF_FAIL(status); - template - struct PtxPlan : Tuning::type - { - typedef Tuning tuning; - - typedef typename core::LoadIterator::type ItemsLoadIt; - typedef typename core::LoadIterator::type StencilLoadIt; - - typedef typename core::BlockLoad::type BlockLoadItems; - typedef typename core::BlockLoad::type BlockLoadStencil; - - typedef cub::TilePrefixCallbackOp - TilePrefixCallback; - - typedef cub::BlockScan - BlockScan; - - - union TempStorage - { - struct ScanStorage - { - typename BlockScan::TempStorage scan; - typename TilePrefixCallback::TempStorage prefix; - } scan_storage; - - typename BlockLoadItems::TempStorage load_items; - typename BlockLoadStencil::TempStorage load_stencil; - - core::uninitialized_array raw_exchange; - }; // union TempStorage - }; // struct PtxPlan - - typedef typename core::specialize_plan_msvc10_war::type::type ptx_plan; - - typedef typename ptx_plan::ItemsLoadIt ItemsLoadIt; - typedef typename ptx_plan::StencilLoadIt StencilLoadIt; - typedef typename ptx_plan::BlockLoadItems BlockLoadItems; - typedef typename ptx_plan::BlockLoadStencil BlockLoadStencil; - typedef typename ptx_plan::TilePrefixCallback TilePrefixCallback; - typedef typename ptx_plan::BlockScan BlockScan; - typedef typename ptx_plan::TempStorage TempStorage; - - enum - { - USE_STENCIL = !thrust::detail::is_same::value, - BLOCK_THREADS = ptx_plan::BLOCK_THREADS, - ITEMS_PER_THREAD = ptx_plan::ITEMS_PER_THREAD, - ITEMS_PER_TILE = ptx_plan::ITEMS_PER_TILE - }; + status = cub::AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes); + CUDA_CUB_RET_IF_FAIL(status); - struct impl - { - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - TempStorage & storage; - ScanTileState &tile_state; - ItemsLoadIt items_load_it; - StencilLoadIt stencil_load_it; - OutputIt output_it; - Predicate predicate; - Size num_items; - - //------------------------------------------ - // scatter results to memory - //------------------------------------------ - - THRUST_DEVICE_FUNCTION void - scatter(item_type (&items)[ITEMS_PER_THREAD], - Size (&selection_flags)[ITEMS_PER_THREAD], - Size (&selection_indices)[ITEMS_PER_THREAD], - int num_tile_selections, - Size num_selections_prefix) - { - using core::sync_threadblock; - -#pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - int local_scatter_offset = selection_indices[ITEM] - - num_selections_prefix; - if (selection_flags[ITEM]) - { - new (&storage.raw_exchange[local_scatter_offset]) item_type(items[ITEM]); - } - } - - sync_threadblock(); - - for (int item = threadIdx.x; - item < num_tile_selections; - item += BLOCK_THREADS) - { - output_it[num_selections_prefix + item] = storage.raw_exchange[item]; - } - } // func scatter - - //------------------------------------------ - // specialize predicate on different types - //------------------------------------------ - - template - struct __tag {}; - - enum ItemStencil - { - ITEM, - STENCIL - }; - - template - struct wrap_value - { - T const & x; - THRUST_DEVICE_FUNCTION wrap_value(T const &x) : x(x) {} - - THRUST_DEVICE_FUNCTION T const &operator()() const { return x; }; - }; // struct wrap_type - - //------- item - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &x, - __tag) - { - return predicate(x()); - } - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &, - __tag) - { - return false; - } - - //-------- stencil - - template - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &x, - __tag) - { - return predicate(x()); - } - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &, - __tag) - { - return false; - } - - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &, - __tag) - { - return false; - } - - template - THRUST_DEVICE_FUNCTION void - compute_selection_flags(int num_tile_items, - T (&values)[ITEMS_PER_THREAD], - Size (&selection_flags)[ITEMS_PER_THREAD]) - { -#pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - // Out-of-bounds items are selection_flags - selection_flags[ITEM] = 1; - - if (!IS_LAST_TILE || - (Size(threadIdx.x * ITEMS_PER_THREAD) + ITEM < num_tile_items)) - { - selection_flags[ITEM] = - predicate_wrapper(wrap_value(values[ITEM]), - __tag()); - } - } - } - - //------------------------------------------ - // consume tiles - //------------------------------------------ - - template - Size THRUST_DEVICE_FUNCTION - consume_tile_impl(int num_tile_items, - int tile_idx, - Size tile_base) - { - item_type items_loc[ITEMS_PER_THREAD]; - Size selection_flags[ITEMS_PER_THREAD]; - Size selection_idx[ITEMS_PER_THREAD]; - - if (IS_LAST_TILE) { - BlockLoadItems(storage.load_items) - .Load(items_load_it + tile_base, - items_loc, - num_tile_items); - } - else - { - BlockLoadItems(storage.load_items) - .Load(items_load_it + tile_base, - items_loc); - } - - core::sync_threadblock(); - - if (USE_STENCIL) - { - stencil_type stencil_loc[ITEMS_PER_THREAD]; - - if (IS_LAST_TILE) - { - BlockLoadStencil(storage.load_stencil) - .Load(stencil_load_it + tile_base, - stencil_loc, - num_tile_items); - } - else - { - BlockLoadStencil(storage.load_stencil) - .Load(stencil_load_it + tile_base, - stencil_loc); - } - - compute_selection_flags(num_tile_items, - stencil_loc, - selection_flags); - } - else /* Use predicate on items rather then stencil */ - { - compute_selection_flags(num_tile_items, - items_loc, - selection_flags); - } - - core::sync_threadblock(); - - Size num_tile_selections = 0; - Size num_selections = 0; - Size num_selections_prefix = 0; - if (IS_FIRST_TILE) - { - BlockScan(storage.scan_storage.scan) - .ExclusiveSum(selection_flags, - selection_idx, - num_tile_selections); - - if (threadIdx.x == 0) - { - // Update tile status if this is not the last tile - if (!IS_LAST_TILE) - tile_state.SetInclusive(0, num_tile_selections); - } - - // Do not count any out-of-bounds selections - if (IS_LAST_TILE) - { - int num_discount = ITEMS_PER_TILE - num_tile_items; - num_tile_selections -= num_discount; - } - num_selections = num_tile_selections; - } - else - { - TilePrefixCallback prefix_cb(tile_state, - storage.scan_storage.prefix, - cub::Sum(), - tile_idx); - BlockScan(storage.scan_storage.scan) - .ExclusiveSum(selection_flags, - selection_idx, - prefix_cb); - - num_selections = prefix_cb.GetInclusivePrefix(); - num_tile_selections = prefix_cb.GetBlockAggregate(); - num_selections_prefix = prefix_cb.GetExclusivePrefix(); - - if (IS_LAST_TILE) - { - int num_discount = ITEMS_PER_TILE - num_tile_items; - num_tile_selections -= num_discount; - num_selections -= num_discount; - } - } - - core::sync_threadblock(); - - scatter(items_loc, - selection_flags, - selection_idx, - num_tile_selections, - num_selections_prefix); - - - return num_selections; - } // func consume_tile_impl - - template - THRUST_DEVICE_FUNCTION Size - consume_tile(int num_tile_items, - int tile_idx, - Size tile_base) - { - if (tile_idx == 0) - { - return consume_tile_impl(num_tile_items, - tile_idx, - tile_base); - } - else - { - return consume_tile_impl(num_tile_items, - tile_idx, - tile_base); - } - } // func consume_tile - - //--------------------------------------------------------------------- - // Constructor - //--------------------------------------------------------------------- - - THRUST_DEVICE_FUNCTION impl(TempStorage & storage_, - ScanTileState & tile_state_, - ItemsIt items_it, - StencilIt stencil_it, - OutputIt output_it_, - Predicate predicate_, - Size num_items_, - int num_tiles, - NumSelectedOutputIt num_selected_out) - : storage(storage_), - tile_state(tile_state_), - items_load_it(core::make_load_iterator(ptx_plan(), items_it)), - stencil_load_it(core::make_load_iterator(ptx_plan(), stencil_it)), - output_it(output_it_), - predicate(predicate_), - num_items(num_items_) - { - int tile_idx = blockIdx.x; - Size tile_base = tile_idx * ITEMS_PER_TILE; - - if (tile_idx < num_tiles - 1) - { - consume_tile(ITEMS_PER_TILE, - tile_idx, - tile_base); - } - else - { - int num_remaining = static_cast(num_items - tile_base); - Size num_selections = consume_tile(num_remaining, - tile_idx, - tile_base); - if (threadIdx.x == 0) - { - *num_selected_out = num_selections; - } - } - } // ctor impl - }; - - //--------------------------------------------------------------------- - // Agent entry point - //--------------------------------------------------------------------- - - THRUST_AGENT_ENTRY(ItemsIt items_it, - StencilIt stencil_it, - OutputIt output_it, - Predicate predicate, - Size num_items, - NumSelectedOutputIt num_selected_out, - ScanTileState tile_state, - int num_tiles, - char * shmem) + // Return if we're only querying temporary storage requirements + if (d_temp_storage == nullptr) { - TempStorage &storage = *reinterpret_cast(shmem); - - impl(storage, - tile_state, - items_it, - stencil_it, - output_it, - predicate, - num_items, - num_tiles, - num_selected_out); + return status; } - }; // struct CopyIfAgent - template - struct InitAgent - { - template - struct PtxPlan : PtxPolicy<128> {}; - typedef core::specialize_plan ptx_plan; - - //--------------------------------------------------------------------- - // Agent entry point - //--------------------------------------------------------------------- - - THRUST_AGENT_ENTRY(ScanTileState tile_state, - Size num_tiles, - NumSelectedIt num_selected_out, - char * /*shmem*/) - { - tile_state.InitializeStatus(num_tiles); - if (blockIdx.x == 0 && threadIdx.x == 0) - *num_selected_out = 0; - } - }; // struct InitAgent - - template - THRUST_RUNTIME_FUNCTION - static cudaError_t doit_step(void * d_temp_storage, - size_t & temp_storage_bytes, - ItemsIt items, - StencilIt stencil, - OutputIt output_it, - Predicate predicate, - NumSelectedOutIt num_selected_out, - Size num_items, - cudaStream_t stream) - { + // Return for empty problems if (num_items == 0) - return cudaSuccess; - - using core::AgentLauncher; - using core::AgentPlan; - using core::get_agent_plan; - - typedef AgentLauncher< - CopyIfAgent > - copy_if_agent; - - typedef typename copy_if_agent::ScanTileState ScanTileState; - - typedef AgentLauncher< - InitAgent > - init_agent; - - - using core::get_plan; - typename get_plan::type init_plan = init_agent::get_plan(); - typename get_plan::type copy_if_plan = copy_if_agent::get_plan(stream); - - int tile_size = copy_if_plan.items_per_tile; - size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size); - - size_t vshmem_size = core::vshmem_size(copy_if_plan.shared_memory_size, - num_tiles); - - cudaError_t status = cudaSuccess; - if (num_items == 0) - return status; - - size_t allocation_sizes[2] = {0, vshmem_size}; - status = ScanTileState::AllocationSize(static_cast(num_tiles), allocation_sizes[0]); - CUDA_CUB_RET_IF_FAIL(status); - - - void* allocations[2] = {NULL, NULL}; - status = cub::AliasTemporaries(d_temp_storage, - temp_storage_bytes, - allocations, - allocation_sizes); - CUDA_CUB_RET_IF_FAIL(status); - - - if (d_temp_storage == NULL) { + output_end = output; return status; } - ScanTileState tile_status; - status = tile_status.Init(static_cast(num_tiles), allocations[0], allocation_sizes[0]); + // Memory allocation for the number of selected output items + OffsetT* d_num_selected_out = thrust::detail::aligned_reinterpret_cast(allocations[1]); + + // Run algorithm + status = cub::DispatchSelectIf< + InputIt, + StencilIt, + OutputIt, + num_selected_out_it_t, + Predicate, + equality_op_t, + OffsetT, + keep_rejects, + may_alias>::Dispatch(allocations[0], + allocation_sizes[0], + first, + stencil, + output, + d_num_selected_out, + predicate, + equality_op_t{}, + num_items, + stream); CUDA_CUB_RET_IF_FAIL(status); - init_agent ia(init_plan, num_tiles, stream, "copy_if::init_agent"); - - char *vshmem_ptr = vshmem_size > 0 ? (char*)allocations[1] : NULL; - - copy_if_agent pa(copy_if_plan, num_items, stream, vshmem_ptr, "copy_if::partition_agent"); - - ia.launch(tile_status, num_tiles, num_selected_out); - CUDA_CUB_RET_IF_FAIL(cudaPeekAtLastError()); - - pa.launch(items, - stencil, - output_it, - predicate, - num_items, - num_selected_out, - tile_status, - num_tiles); - CUDA_CUB_RET_IF_FAIL(cudaPeekAtLastError()); - return status; - } - - template - THRUST_RUNTIME_FUNCTION - OutputIt copy_if(execution_policy& policy, - InputIt first, - InputIt last, - StencilIt stencil, - OutputIt output, - Predicate predicate) - { - typedef int size_type; - - size_type num_items = static_cast(thrust::distance(first, last)); - size_t temp_storage_bytes = 0; - cudaStream_t stream = cuda_cub::stream(policy); - - if (num_items == 0) - return output; - - cudaError_t status; - status = doit_step(NULL, - temp_storage_bytes, - first, - stencil, - output, - predicate, - reinterpret_cast(NULL), - num_items, - stream); - cuda_cub::throw_on_error(status, "copy_if failed on 1st step"); - - size_t allocation_sizes[2] = {sizeof(size_type), temp_storage_bytes}; - void * allocations[2] = {NULL, NULL}; - - size_t storage_size = 0; - - status = core::alias_storage(NULL, - storage_size, - allocations, - allocation_sizes); - cuda_cub::throw_on_error(status, "copy_if failed on 1st alias_storage"); - - // Allocate temporary storage. - thrust::detail::temporary_array - tmp(policy, storage_size); - void *ptr = static_cast(tmp.data().get()); - - status = core::alias_storage(ptr, - storage_size, - allocations, - allocation_sizes); - cuda_cub::throw_on_error(status, "copy_if failed on 2nd alias_storage"); - - size_type* d_num_selected_out - = thrust::detail::aligned_reinterpret_cast(allocations[0]); - - status = doit_step(allocations[1], - temp_storage_bytes, - first, - stencil, - output, - predicate, - d_num_selected_out, - num_items, - stream); - cuda_cub::throw_on_error(status, "copy_if failed on 2nd step"); - + // Get number of selected items status = cuda_cub::synchronize(policy); - cuda_cub::throw_on_error(status, "copy_if failed to synchronize"); - - size_type num_selected = get_value(policy, d_num_selected_out); + CUDA_CUB_RET_IF_FAIL(status); + OffsetT num_selected = get_value(policy, d_num_selected_out); - return output + num_selected; + output_end = output + num_selected; + return status; } - -} // namespace __copy_if +}; + +template +THRUST_RUNTIME_FUNCTION OutputIt copy_if( + execution_policy& policy, + InputIt first, + InputIt last, + StencilIt stencil, + OutputIt output, + Predicate predicate) +{ + using size_type = typename iterator_traits::difference_type; + + size_type num_items = static_cast(thrust::distance(first, last)); + OutputIt output_end{}; + cudaError_t status = cudaSuccess; + size_t temp_storage_bytes = 0; + + // 32-bit offset-type dispatch + using dispatch32_t = DispatchCopyIf; + + // 64-bit offset-type dispatch + using dispatch64_t = DispatchCopyIf; + + // Query temporary storage requirements + THRUST_INDEX_TYPE_DISPATCH2( + status, + dispatch32_t::dispatch, + dispatch64_t::dispatch, + num_items, + (policy, nullptr, temp_storage_bytes, first, stencil, output, predicate, num_items_fixed, output_end)); + cuda_cub::throw_on_error(status, "copy_if failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp(policy, temp_storage_bytes); + void* temp_storage = static_cast(tmp.data().get()); + + // Run algorithm + THRUST_INDEX_TYPE_DISPATCH2( + status, + dispatch32_t::dispatch, + dispatch64_t::dispatch, + num_items, + (policy, temp_storage, temp_storage_bytes, first, stencil, output, predicate, num_items_fixed, output_end)); + cuda_cub::throw_on_error(status, "copy_if failed on 1st step"); + + return output_end; +} + +} // namespace detail //------------------------- // Thrust API entry points //------------------------- - _CCCL_EXEC_CHECK_DISABLE -template -OutputIterator _CCCL_HOST_DEVICE -copy_if(execution_policy &policy, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) +template +OutputIterator _CCCL_HOST_DEVICE copy_if( + execution_policy& policy, InputIterator first, InputIterator last, OutputIterator result, Predicate pred) { - THRUST_CDP_DISPATCH((return __copy_if::copy_if(policy, - first, - last, - __copy_if::no_stencil_tag(), - result, - pred);), - (return - thrust::copy_if(cvt_to_seq(derived_cast(policy)), - first, - last, - result, - pred);)); -} // func copy_if + THRUST_CDP_DISPATCH( + (return detail::copy_if(policy, first, last, static_cast(nullptr), result, pred);), + (return thrust::copy_if(cvt_to_seq(derived_cast(policy)), first, last, result, pred);)); +} _CCCL_EXEC_CHECK_DISABLE -template -OutputIterator _CCCL_HOST_DEVICE -copy_if(execution_policy &policy, - InputIterator first, - InputIterator last, - StencilIterator stencil, - OutputIterator result, - Predicate pred) +template +OutputIterator _CCCL_HOST_DEVICE copy_if( + execution_policy& policy, + InputIterator first, + InputIterator last, + StencilIterator stencil, + OutputIterator result, + Predicate pred) { - THRUST_CDP_DISPATCH( - (return __copy_if::copy_if(policy, first, last, stencil, result, pred);), - (return thrust::copy_if(cvt_to_seq(derived_cast(policy)), - first, - last, - stencil, - result, - pred);)); -} // func copy_if - -} // namespace cuda_cub + THRUST_CDP_DISPATCH((return detail::copy_if(policy, first, last, stencil, result, pred);), + (return thrust::copy_if(cvt_to_seq(derived_cast(policy)), first, last, stencil, result, pred);)); +} + +} // namespace cuda_cub THRUST_NAMESPACE_END -#include +# include #endif From 158fa5332ff0147ff22c4eb7aea4c64c711e09a3 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 14:13:01 +0000 Subject: [PATCH 13/44] Refactor thrust::[stable_]partition[_copy] to use cub::DevicePartition --- testing/cuda/partition.cu | 101 ++- thrust/system/cuda/detail/copy_if.h | 2 +- thrust/system/cuda/detail/partition.h | 1192 ++++++------------------- 3 files changed, 353 insertions(+), 942 deletions(-) diff --git a/testing/cuda/partition.cu b/testing/cuda/partition.cu index f8701db6f..4069682e5 100644 --- a/testing/cuda/partition.cu +++ b/testing/cuda/partition.cu @@ -1,8 +1,10 @@ -#include -#include #include #include +#include +#include +#include "thrust/detail/raw_pointer_cast.h" +#include template struct is_even @@ -12,6 +14,27 @@ struct is_even }; +template +struct mod_n +{ + T mod; + bool negate; + __host__ __device__ bool operator()(T x) + { + return (x % mod == 0) ? (!negate) : negate; + } +}; + +template +struct multiply_n +{ + T multiplier; + __host__ __device__ T operator()(T x) + { + return x * multiplier; + } +}; + #ifdef THRUST_TEST_DEVICE_SIDE template __global__ @@ -553,12 +576,84 @@ void TestStablePartitionCopyStencilDeviceDevice() } DECLARE_UNITTEST(TestStablePartitionCopyStencilDeviceDevice); - void TestStablePartitionCopyStencilDeviceNoSync() { TestStablePartitionCopyStencilDevice(thrust::cuda::par_nosync); } DECLARE_UNITTEST(TestStablePartitionCopyStencilDeviceNoSync); + +void TestPartitionIfWithMagnitude(int magnitude) +{ + using offset_t = std::size_t; + + // Prepare input + offset_t num_items = offset_t{1ull} << magnitude; + thrust::counting_iterator begin(offset_t{0}); + auto end = begin + num_items; + thrust::counting_iterator stencil(offset_t{0}); + ASSERT_EQUAL(static_cast(thrust::distance(begin, end)), num_items); + + // Run algorithm on large number of items + offset_t match_every_nth = 1000000; + offset_t expected_num_written = (num_items + match_every_nth - 1) / match_every_nth; + + // Tests input is correctly dereferenced for large offsets and selected items are correctly written + { + // Initialize input + thrust::device_vector partitioned_out(expected_num_written); + + // Run test + constexpr bool negate_matches = false; + auto select_op = mod_n{match_every_nth, negate_matches}; + auto partitioned_out_ends = + thrust::stable_partition_copy(begin, end, partitioned_out.begin(), thrust::make_discard_iterator(), select_op); + const auto selected_out_end = partitioned_out_ends.first; + + // Ensure number of selected items are correct + const offset_t num_selected_out = + static_cast(thrust::distance(partitioned_out.begin(), selected_out_end)); + ASSERT_EQUAL(num_selected_out, expected_num_written); + partitioned_out.resize(expected_num_written); + + // Ensure selected items are correct + auto expected_out_it = thrust::make_transform_iterator(begin, multiply_n{match_every_nth}); + bool all_results_correct = thrust::equal(partitioned_out.begin(), partitioned_out.end(), expected_out_it); + ASSERT_EQUAL(all_results_correct, true); + } + + // Tests input is correctly dereferenced for large offsets and rejected items are correctly written + { + // Initialize input + thrust::device_vector partitioned_out(expected_num_written); + + // Run test + constexpr bool negate_matches = true; + auto select_op = mod_n{match_every_nth, negate_matches}; + const auto partitioned_out_ends = + thrust::stable_partition_copy(begin, end, thrust::make_discard_iterator(), partitioned_out.begin(), select_op); + const auto rejected_out_end = partitioned_out_ends.second; + + // Ensure number of rejected items are correct + const offset_t num_rejected_out = + static_cast(thrust::distance(partitioned_out.begin(), rejected_out_end)); + ASSERT_EQUAL(num_rejected_out, expected_num_written); + partitioned_out.resize(expected_num_written); + + // Ensure rejected items are correct + auto expected_out_it = thrust::make_transform_iterator(begin, multiply_n{match_every_nth}); + bool all_results_correct = thrust::equal(partitioned_out.begin(), partitioned_out.end(), expected_out_it); + ASSERT_EQUAL(all_results_correct, true); + } +} + +void TestPartitionIfWithLargeNumberOfItems() +{ + TestPartitionIfWithMagnitude(30); + TestPartitionIfWithMagnitude(31); + TestPartitionIfWithMagnitude(32); + TestPartitionIfWithMagnitude(33); +} +DECLARE_UNITTEST(TestPartitionIfWithLargeNumberOfItems); #endif diff --git a/thrust/system/cuda/detail/copy_if.h b/thrust/system/cuda/detail/copy_if.h index ec7f75cd9..f6c8a3221 100644 --- a/thrust/system/cuda/detail/copy_if.h +++ b/thrust/system/cuda/detail/copy_if.h @@ -218,7 +218,7 @@ THRUST_RUNTIME_FUNCTION OutputIt copy_if( dispatch64_t::dispatch, num_items, (policy, temp_storage, temp_storage_bytes, first, stencil, output, predicate, num_items_fixed, output_end)); - cuda_cub::throw_on_error(status, "copy_if failed on 1st step"); + cuda_cub::throw_on_error(status, "copy_if failed on 2nd step"); return output_end; } diff --git a/thrust/system/cuda/detail/partition.h b/thrust/system/cuda/detail/partition.h index 213a11799..6989406da 100644 --- a/thrust/system/cuda/detail/partition.h +++ b/thrust/system/cuda/detail/partition.h @@ -30,1041 +30,357 @@ #if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include // cub::ScanTileState -#include -#include -#include -#include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include THRUST_NAMESPACE_BEGIN namespace cuda_cub { -namespace __partition { - - template - struct PtxPolicy - { - enum - { - BLOCK_THREADS = _BLOCK_THREADS, - ITEMS_PER_THREAD = _ITEMS_PER_THREAD, - ITEMS_PER_TILE = _BLOCK_THREADS * _ITEMS_PER_THREAD - }; - static const cub::BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; - static const cub::CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; - static const cub::BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; - }; // struct PtxPolicy - - template - struct Tuning; - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); - - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 10, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef PtxPolicy<128, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_LDG, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning<350> - - template - struct Tuning - { - const static int INPUT_SIZE = sizeof(T); - - enum - { - NOMINAL_4B_ITEMS_PER_THREAD = 7, - ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(3, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), - }; - - typedef PtxPolicy<128, - ITEMS_PER_THREAD, - cub::BLOCK_LOAD_WARP_TRANSPOSE, - cub::LOAD_DEFAULT, - cub::BLOCK_SCAN_WARP_SCANS> - type; - }; // Tuning<300> - - template - struct __tag{}; - - - struct no_stencil_tag_ {}; - struct single_output_tag_ - { - template - THRUST_DEVICE_FUNCTION T const& operator=(T const& t) const { return t; } - }; - - typedef no_stencil_tag_* no_stencil_tag; - typedef single_output_tag_* single_output_tag;; - - template - struct PartitionAgent - { - typedef typename iterator_traits::value_type item_type; - typedef typename iterator_traits::value_type stencil_type; - - - typedef cub::ScanTileState ScanTileState; - - template - struct PtxPlan : Tuning::type - { - typedef Tuning tuning; - - typedef typename core::LoadIterator::type ItemsLoadIt; - typedef typename core::LoadIterator::type StencilLoadIt; - - typedef typename core::BlockLoad::type BlockLoadItems; - typedef typename core::BlockLoad::type BlockLoadStencil; - - typedef cub::TilePrefixCallbackOp - TilePrefixCallback; - typedef cub::BlockScan - BlockScan; - - - union TempStorage - { - struct ScanStorage - { - typename BlockScan::TempStorage scan; - typename TilePrefixCallback::TempStorage prefix; - } scan_storage; - - typename BlockLoadItems::TempStorage load_items; - typename BlockLoadStencil::TempStorage load_stencil; - - core::uninitialized_array raw_exchange; - }; // union TempStorage - }; // struct PtxPlan - typedef typename core::specialize_plan_msvc10_war::type::type ptx_plan; - - typedef typename ptx_plan::ItemsLoadIt ItemsLoadIt; - typedef typename ptx_plan::StencilLoadIt StencilLoadIt; - typedef typename ptx_plan::BlockLoadItems BlockLoadItems; - typedef typename ptx_plan::BlockLoadStencil BlockLoadStencil; - typedef typename ptx_plan::TilePrefixCallback TilePrefixCallback; - typedef typename ptx_plan::BlockScan BlockScan; - typedef typename ptx_plan::TempStorage TempStorage; - - enum - { - SINGLE_OUTPUT = thrust::detail::is_same::value, - USE_STENCIL = !thrust::detail::is_same::value, - BLOCK_THREADS = ptx_plan::BLOCK_THREADS, - ITEMS_PER_THREAD = ptx_plan::ITEMS_PER_THREAD, - ITEMS_PER_TILE = ptx_plan::ITEMS_PER_TILE - }; - - - struct impl - { - //--------------------------------------------------------------------- - // Per-thread fields - //--------------------------------------------------------------------- - - TempStorage & temp_storage; - ScanTileState &tile_state; - ItemsLoadIt items_glob; - StencilLoadIt stencil_glob; - SelectedOutIt selected_out_glob; - RejectedOutIt rejected_out_glob; - Predicate predicate; - Size num_items; - - //--------------------------------------------------------------------- - // Utilities - //--------------------------------------------------------------------- - - template - THRUST_DEVICE_FUNCTION void - scatter(item_type (&items)[ITEMS_PER_THREAD], - Size (&selection_flags)[ITEMS_PER_THREAD], - Size (&selection_indices)[ITEMS_PER_THREAD], - int num_tile_items, - int num_tile_selections, - Size num_selections_prefix, - Size num_rejected_prefix, - Size /*num_selections*/) - { - int tile_num_rejections = num_tile_items - num_tile_selections; - - // Scatter items to shared memory (rejections first) -#pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - int item_idx = (threadIdx.x * ITEMS_PER_THREAD) + ITEM; - int local_selection_idx = selection_indices[ITEM] - num_selections_prefix; - int local_rejection_idx = item_idx - local_selection_idx; - int local_scatter_offset = (selection_flags[ITEM]) - ? tile_num_rejections + local_selection_idx - : local_rejection_idx; - - temp_storage.raw_exchange[local_scatter_offset] = items[ITEM]; - } - - core::sync_threadblock(); - - // Gather items from shared memory and scatter to global -#pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - int item_idx = (ITEM * BLOCK_THREADS) + threadIdx.x; - int rejection_idx = item_idx; - int selection_idx = item_idx - tile_num_rejections; - Size scatter_offset = (item_idx < tile_num_rejections) - ? num_items - - num_rejected_prefix - rejection_idx - 1 - : num_selections_prefix + selection_idx; - - item_type item = temp_storage.raw_exchange[item_idx]; - - if (!IS_LAST_TILE || (item_idx < num_tile_items)) - { - if (SINGLE_OUTPUT || item_idx >= tile_num_rejections) - { - selected_out_glob[scatter_offset] = item; - } - else // if !SINGLE_OUTPUT, scatter rejected items separately - { - rejected_out_glob[num_items - scatter_offset - 1] = item; - } - } - } - } // func scatter - - //------------------------------------------ - // specialize predicate on different types - //------------------------------------------ - - enum ItemStencil - { - ITEM, - STENCIL - }; - - template - struct wrap_value - { - T const & x; - THRUST_DEVICE_FUNCTION wrap_value(T const &x) : x(x) {} - - THRUST_DEVICE_FUNCTION T const &operator()() const { return x; }; - }; // struct wrap_type - - //------- item - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &x, - __tag) - { - return predicate(x()); - } - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &, - __tag) - { - return false; - } - - //-------- stencil - - template - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &x, - __tag) - { - return predicate(x()); - } - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &, - __tag) - { - return false; - } - - - THRUST_DEVICE_FUNCTION bool - predicate_wrapper(wrap_value const &, - __tag) - { - return false; - } - - template - THRUST_DEVICE_FUNCTION void - compute_selection_flags(int num_tile_items, - T (&values)[ITEMS_PER_THREAD], - Size (&selection_flags)[ITEMS_PER_THREAD]) - { -#pragma unroll - for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) - { - // Out-of-bounds items are selection_flags - selection_flags[ITEM] = 1; - - if (!IS_LAST_TILE || - (Size(threadIdx.x * ITEMS_PER_THREAD) + ITEM < num_tile_items)) - { - selection_flags[ITEM] = - predicate_wrapper(wrap_value(values[ITEM]), - __tag()); - } - } - } - - //--------------------------------------------------------------------- - // Tile processing - //--------------------------------------------------------------------- - - template - Size THRUST_DEVICE_FUNCTION - consume_tile_impl(int num_tile_items, - int tile_idx, - Size tile_base) - { - item_type items_loc[ITEMS_PER_THREAD]; - Size selection_flags[ITEMS_PER_THREAD]; - Size selection_idx[ITEMS_PER_THREAD]; - - if (IS_LAST_TILE) - { - BlockLoadItems(temp_storage.load_items) - .Load(items_glob + tile_base, items_loc, num_tile_items); - } - else - { - BlockLoadItems(temp_storage.load_items) - .Load(items_glob + tile_base, items_loc); - } - - core::sync_threadblock(); - - if (USE_STENCIL) - { - stencil_type stencil_loc[ITEMS_PER_THREAD]; - - if (IS_LAST_TILE) - { - BlockLoadStencil(temp_storage.load_stencil) - .Load(stencil_glob + tile_base, stencil_loc, num_tile_items); - } - else - { - BlockLoadStencil(temp_storage.load_stencil) - .Load(stencil_glob + tile_base, stencil_loc); - } - - compute_selection_flags(num_tile_items, - stencil_loc, - selection_flags); - } - else /* Use predicate on items rather then stencil */ - { - compute_selection_flags(num_tile_items, - items_loc, - selection_flags); - } - - core::sync_threadblock(); - - Size num_tile_selections = 0; - Size num_selections = 0; - Size num_selections_prefix = 0; - Size num_rejected_prefix = 0; - if (IS_FIRST_TILE) - { - BlockScan(temp_storage.scan_storage.scan) - .ExclusiveSum(selection_flags, - selection_idx, - num_tile_selections); - - if (threadIdx.x == 0) - { - // Update tile status if this is not the last tile - if (!IS_LAST_TILE) - tile_state.SetInclusive(0, num_tile_selections); - } - - // Do not count any out-of-bounds selections - if (IS_LAST_TILE) - { - int num_discount = ITEMS_PER_TILE - num_tile_items; - num_tile_selections -= num_discount; - } - num_selections = num_tile_selections; - } - else - { - TilePrefixCallback prefix_cb(tile_state, - temp_storage.scan_storage.prefix, - cub::Sum(), - tile_idx); - BlockScan(temp_storage.scan_storage.scan) - .ExclusiveSum(selection_flags, - selection_idx, - prefix_cb); - - num_selections = prefix_cb.GetInclusivePrefix(); - num_tile_selections = prefix_cb.GetBlockAggregate(); - num_selections_prefix = prefix_cb.GetExclusivePrefix(); - num_rejected_prefix = tile_base - num_selections_prefix; - - if (IS_LAST_TILE) - { - int num_discount = ITEMS_PER_TILE - num_tile_items; - num_tile_selections -= num_discount; - num_selections -= num_discount; - } - } - - core::sync_threadblock(); - - scatter(items_loc, - selection_flags, - selection_idx, - num_tile_items, - num_tile_selections, - num_selections_prefix, - num_rejected_prefix, - num_selections); - - - return num_selections; - } - - - template - THRUST_DEVICE_FUNCTION Size - consume_tile(int num_tile_items, - int tile_idx, - Size tile_base) - { - if (tile_idx == 0) - { - return consume_tile_impl(num_tile_items, - tile_idx, - tile_base); - } - else - { - return consume_tile_impl(num_tile_items, - tile_idx, - tile_base); - } - } - - //--------------------------------------------------------------------- - // Constructor - //--------------------------------------------------------------------- - - THRUST_DEVICE_FUNCTION - impl(TempStorage & temp_storage_, - ScanTileState & tile_state_, - ItemsLoadIt items_glob_, - StencilLoadIt stencil_glob_, - SelectedOutIt selected_out_glob_, - RejectedOutIt rejected_out_glob_, - Predicate predicate_, - Size num_items_, - int num_tiles, - NumSelectedOutIt num_selected_out) - : temp_storage(temp_storage_), - tile_state(tile_state_), - items_glob(items_glob_), - stencil_glob(stencil_glob_), - selected_out_glob(selected_out_glob_), - rejected_out_glob(rejected_out_glob_), - predicate(predicate_), - num_items(num_items_) - { - int tile_idx = blockIdx.x; - Size tile_base = tile_idx * ITEMS_PER_TILE; - - if (tile_idx < num_tiles - 1) - { - consume_tile(ITEMS_PER_TILE, - tile_idx, - tile_base); - } - else - { - int num_remaining = static_cast(num_items - tile_base); - Size num_selections = consume_tile(num_remaining, - tile_idx, - tile_base); - if (threadIdx.x == 0) - { - *num_selected_out = num_selections; - } - } - } // - }; //struct impl - - //--------------------------------------------------------------------- - // Agent entry point - //--------------------------------------------------------------------- - - THRUST_AGENT_ENTRY(ItemsIt items, - StencilIt stencil, - SelectedOutIt selected_out, - RejectedOutIt rejected_out, - Predicate predicate, - Size num_items, - NumSelectedOutIt num_selected_out, - ScanTileState tile_state, - int num_tiles, - char * shmem) - { - TempStorage &storage = *reinterpret_cast(shmem); - - impl(storage, - tile_state, - core::make_load_iterator(ptx_plan(), items), - core::make_load_iterator(ptx_plan(), stencil), - selected_out, - rejected_out, - predicate, - num_items, - num_tiles, - num_selected_out); - } - }; // struct PartitionAgent +namespace detail +{ - template - struct InitAgent +template +struct DispatchPartitionIf +{ + static cudaError_t THRUST_RUNTIME_FUNCTION dispatch( + execution_policy& policy, + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIt first, + StencilIt stencil, + OutputIt output, + Predicate predicate, + OffsetT num_items, + std::size_t& num_selected) { - template - struct PtxPlan : PtxPolicy<128> {}; - - - typedef core::specialize_plan ptx_plan; + using num_selected_out_it_t = OffsetT*; + using equality_op_t = cub::NullType; + + cudaError_t status = cudaSuccess; + cudaStream_t stream = cuda_cub::stream(policy); + + std::size_t allocation_sizes[2] = {0, sizeof(OffsetT)}; + void* allocations[2] = {nullptr, nullptr}; + + // Partitioning algorithm keeps "rejected" items + constexpr bool keep_rejects = true; + constexpr bool may_alias = false; + + // Query algorithm memory requirements + status = cub::DispatchSelectIf< + InputIt, + StencilIt, + OutputIt, + num_selected_out_it_t, + Predicate, + equality_op_t, + OffsetT, + keep_rejects, + may_alias>::Dispatch(nullptr, + allocation_sizes[0], + first, + stencil, + output, + static_cast(nullptr), + predicate, + equality_op_t{}, + num_items, + stream); + CUDA_CUB_RET_IF_FAIL(status); - //--------------------------------------------------------------------- - // Agent entry point - //--------------------------------------------------------------------- + status = cub::AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes); + CUDA_CUB_RET_IF_FAIL(status); - THRUST_AGENT_ENTRY(ScanTileState tile_state, - Size num_tiles, - NumSelectedIt num_selected_out, - char * /*shmem*/) + // Return if we're only querying temporary storage requirements + if (d_temp_storage == nullptr) { - tile_state.InitializeStatus(num_tiles); - if (blockIdx.x == 0 && threadIdx.x == 0) - *num_selected_out = 0; + return status; } - }; // struct InitAgent - - template - static cudaError_t THRUST_RUNTIME_FUNCTION - doit_step(void * d_temp_storage, - size_t & temp_storage_bytes, - ItemsIt items, - StencilIt stencil, - SelectedOutIt selected_out, - RejectedOutIt rejected_out, - Predicate predicate, - NumSelectedOutIt num_selected_out, - Size num_items, - cudaStream_t stream) - { - using core::AgentLauncher; - using core::AgentPlan; - using core::get_agent_plan; - - typedef AgentLauncher< - PartitionAgent > - partition_agent; - - typedef typename partition_agent::ScanTileState ScanTileState; - - typedef AgentLauncher< - InitAgent > - init_agent; - - - using core::get_plan; - typename get_plan::type init_plan = init_agent::get_plan(); - typename get_plan::type partition_plan = partition_agent::get_plan(stream); - - int tile_size = partition_plan.items_per_tile; - size_t num_tiles = cub::DivideAndRoundUp(num_items, tile_size); - - size_t vshmem_storage = core::vshmem_size(partition_plan.shared_memory_size, - num_tiles); - - cudaError_t status = cudaSuccess; + // Return for empty problems if (num_items == 0) - return status; - - size_t allocation_sizes[2] = {0, vshmem_storage}; - status = ScanTileState::AllocationSize(static_cast(num_tiles), allocation_sizes[0]); - CUDA_CUB_RET_IF_FAIL(status); - - - void* allocations[2] = {NULL, NULL}; - status = cub::AliasTemporaries(d_temp_storage, - temp_storage_bytes, - allocations, - allocation_sizes); - CUDA_CUB_RET_IF_FAIL(status); - - if (d_temp_storage == NULL) { + num_selected = 0; return status; } - ScanTileState tile_status; - status = tile_status.Init(static_cast(num_tiles), allocations[0], allocation_sizes[0]); + // Memory allocation for the number of selected output items + OffsetT* d_num_selected_out = thrust::detail::aligned_reinterpret_cast(allocations[1]); + + // Run algorithm + status = cub::DispatchSelectIf< + InputIt, + StencilIt, + OutputIt, + num_selected_out_it_t, + Predicate, + equality_op_t, + OffsetT, + keep_rejects, + may_alias>::Dispatch(allocations[0], + allocation_sizes[0], + first, + stencil, + output, + d_num_selected_out, + predicate, + equality_op_t{}, + num_items, + stream); CUDA_CUB_RET_IF_FAIL(status); - init_agent ia(init_plan, num_tiles, stream, "partition::init_agent"); - - char *vshmem_ptr = vshmem_storage > 0 ? (char *)allocations[1] : NULL; - - partition_agent pa(partition_plan, num_items, stream, vshmem_ptr, "partition::partition_agent"); - - ia.launch(tile_status, num_tiles, num_selected_out); - CUDA_CUB_RET_IF_FAIL(cudaPeekAtLastError()); + // Get number of selected items + status = cuda_cub::synchronize(policy); + CUDA_CUB_RET_IF_FAIL(status); + num_selected = static_cast(get_value(policy, d_num_selected_out)); - pa.launch(items, - stencil, - selected_out, - rejected_out, - predicate, - num_items, - num_selected_out, - tile_status, - num_tiles); - CUDA_CUB_RET_IF_FAIL(cudaPeekAtLastError()); return status; - } +}; + +template +THRUST_RUNTIME_FUNCTION std::size_t partition( + execution_policy& policy, + InputIt first, + InputIt last, + StencilIt stencil, + OutputIt output, + Predicate predicate) +{ + using size_type = typename iterator_traits::difference_type; + + size_type num_items = thrust::distance(first, last); + std::size_t num_selected{}; + cudaError_t status = cudaSuccess; + size_t temp_storage_bytes = 0; + + // 32-bit offset-type dispatch + using dispatch32_t = DispatchPartitionIf; + + // 64-bit offset-type dispatch + using dispatch64_t = DispatchPartitionIf; + + // Query temporary storage requirements + THRUST_INDEX_TYPE_DISPATCH2( + status, + dispatch32_t::dispatch, + dispatch64_t::dispatch, + num_items, + (policy, nullptr, temp_storage_bytes, first, stencil, output, predicate, num_items_fixed, num_selected)); + cuda_cub::throw_on_error(status, "partition failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp(policy, temp_storage_bytes); + void* temp_storage = static_cast(tmp.data().get()); + + // Run algorithm + THRUST_INDEX_TYPE_DISPATCH2( + status, + dispatch32_t::dispatch, + dispatch64_t::dispatch, + num_items, + (policy, temp_storage, temp_storage_bytes, first, stencil, output, predicate, num_items_fixed, num_selected)); + cuda_cub::throw_on_error(status, "partition failed on 2nd step"); + + return num_selected; +} - template - THRUST_RUNTIME_FUNCTION - pair - partition(execution_policy& policy, - InputIt first, - InputIt last, - StencilIt stencil, - SelectedOutIt selected_result, - RejectedOutIt rejected_result, - Predicate predicate) - { - typedef typename iterator_traits::difference_type size_type; - - size_type num_items = static_cast(thrust::distance(first, last)); - size_t temp_storage_bytes = 0; - cudaStream_t stream = cuda_cub::stream(policy); - - cudaError_t status; - status = doit_step(NULL, - temp_storage_bytes, - first, - stencil, - selected_result, - rejected_result, - predicate, - reinterpret_cast(NULL), - num_items, - stream); - cuda_cub::throw_on_error(status, "partition failed on 1st step"); - - size_t allocation_sizes[2] = {sizeof(size_type), temp_storage_bytes}; - void * allocations[2] = {NULL, NULL}; - - size_t storage_size = 0; - - status = core::alias_storage(NULL, - storage_size, - allocations, - allocation_sizes); - cuda_cub::throw_on_error(status, "partition failed on 1st alias_storage"); - - // Allocate temporary storage. - thrust::detail::temporary_array - tmp(policy, storage_size); - void *ptr = static_cast(tmp.data().get()); - - status = core::alias_storage(ptr, - storage_size, - allocations, - allocation_sizes); - cuda_cub::throw_on_error(status, "partition failed on 2nd alias_storage"); - - size_type* d_num_selected_out - = thrust::detail::aligned_reinterpret_cast(allocations[0]); - - status = doit_step(allocations[1], - temp_storage_bytes, - first, - stencil, - selected_result, - rejected_result, - predicate, - d_num_selected_out, - num_items, - stream); - cuda_cub::throw_on_error(status, "partition failed on 2nd step"); - - status = cuda_cub::synchronize(policy); - cuda_cub::throw_on_error(status, "partition failed to synchronize"); - - size_type num_selected = 0; - if (num_items > 0) - { - num_selected = get_value(policy, d_num_selected_out); - } - - return thrust::make_pair(selected_result + num_selected, - rejected_result + num_items - num_selected); +template +THRUST_RUNTIME_FUNCTION pair stable_partition_copy( + execution_policy& policy, + InputIt first, + InputIt last, + StencilIt stencil, + SelectedOutIt selected_result, + RejectedOutIt rejected_result, + Predicate predicate) +{ + if(thrust::distance(first, last) <= 0){ + return thrust::make_pair(selected_result, rejected_result); } - template - THRUST_RUNTIME_FUNCTION - Iterator partition_inplace(execution_policy& policy, - Iterator first, - Iterator last, - StencilIt stencil, - Predicate predicate) - { - typedef typename iterator_traits::difference_type size_type; - typedef typename iterator_traits::value_type value_type; - - size_type num_items = thrust::distance(first, last); - - // Allocate temporary storage. - thrust::detail::temporary_array tmp(policy, num_items); + using output_it_wrapper_t = cub::detail::partition_distinct_output_t; + std::size_t num_items = static_cast(thrust::distance(first, last)); + std::size_t num_selected = partition( + policy, first, last, stencil, output_it_wrapper_t{selected_result, rejected_result}, predicate); + return thrust::make_pair(selected_result + num_selected, rejected_result + num_items - num_selected); +} - cuda_cub::uninitialized_copy(policy, first, last, tmp.begin()); +template +THRUST_RUNTIME_FUNCTION InputIt inplace_partition( + execution_policy& policy, InputIt first, InputIt last, StencilIt stencil, Predicate predicate) +{ + if(thrust::distance(first, last) <= 0){ + return first; + } - pair result = - partition(policy, - tmp.data().get(), - tmp.data().get() + num_items, - stencil, - first, - single_output_tag(), - predicate); + // Element type of the input iterator + using value_t = typename iterator_traits::value_type; + std::size_t num_items = static_cast(thrust::distance(first, last)); - size_type num_selected = result.first - first; + // Allocate temporary storage, which will serve as the input to the partition + thrust::detail::temporary_array tmp(policy, num_items); + cuda_cub::uninitialized_copy(policy, first, last, tmp.begin()); - return first + num_selected; - } -} // namespace __partition + // Partition input from temporary storage to the user-provided range [`first`, `last`) + std::size_t num_selected = + partition(policy, tmp.data().get(), tmp.data().get() + num_items, stencil, first, predicate); + return first + num_selected; +} -///// copy +} // namespace detail //------------------------- // Thrust API entry points //------------------------- _CCCL_EXEC_CHECK_DISABLE -template -pair _CCCL_HOST_DEVICE -partition_copy(execution_policy &policy, - InputIt first, - InputIt last, - StencilIt stencil, - SelectedOutIt selected_result, - RejectedOutIt rejected_result, - Predicate predicate) +template +pair _CCCL_HOST_DEVICE partition_copy( + execution_policy& policy, + InputIt first, + InputIt last, + StencilIt stencil, + SelectedOutIt selected_result, + RejectedOutIt rejected_result, + Predicate predicate) { auto ret = thrust::make_pair(selected_result, rejected_result); THRUST_CDP_DISPATCH( - (ret = __partition::partition(policy, - first, - last, - stencil, - selected_result, - rejected_result, - predicate);), - (ret = thrust::partition_copy(cvt_to_seq(derived_cast(policy)), - first, - last, - stencil, - selected_result, - rejected_result, - predicate);)); + (ret = detail::stable_partition_copy(policy, first, last, stencil, selected_result, rejected_result, predicate);), + (ret = thrust::partition_copy( + cvt_to_seq(derived_cast(policy)), first, last, stencil, selected_result, rejected_result, predicate);)); return ret; } _CCCL_EXEC_CHECK_DISABLE -template -pair _CCCL_HOST_DEVICE -partition_copy(execution_policy &policy, - InputIt first, - InputIt last, - SelectedOutIt selected_result, - RejectedOutIt rejected_result, - Predicate predicate) +template +pair _CCCL_HOST_DEVICE partition_copy( + execution_policy& policy, + InputIt first, + InputIt last, + SelectedOutIt selected_result, + RejectedOutIt rejected_result, + Predicate predicate) { auto ret = thrust::make_pair(selected_result, rejected_result); THRUST_CDP_DISPATCH( - (ret = __partition::partition(policy, - first, - last, - __partition::no_stencil_tag(), - selected_result, - rejected_result, - predicate);), - (ret = thrust::partition_copy(cvt_to_seq(derived_cast(policy)), - first, - last, - selected_result, - rejected_result, - predicate);)); + (ret = detail::stable_partition_copy( + policy, first, last, static_cast(nullptr), selected_result, rejected_result, predicate);), + (ret = thrust::partition_copy( + cvt_to_seq(derived_cast(policy)), first, last, selected_result, rejected_result, predicate);)); return ret; } _CCCL_EXEC_CHECK_DISABLE -template -pair _CCCL_HOST_DEVICE -stable_partition_copy(execution_policy &policy, - InputIt first, - InputIt last, - SelectedOutIt selected_result, - RejectedOutIt rejected_result, - Predicate predicate) +template +pair _CCCL_HOST_DEVICE stable_partition_copy( + execution_policy& policy, + InputIt first, + InputIt last, + StencilIt stencil, + SelectedOutIt selected_result, + RejectedOutIt rejected_result, + Predicate predicate) { auto ret = thrust::make_pair(selected_result, rejected_result); THRUST_CDP_DISPATCH( - (ret = __partition::partition(policy, - first, - last, - __partition::no_stencil_tag(), - selected_result, - rejected_result, - predicate);), - (ret = thrust::stable_partition_copy(cvt_to_seq(derived_cast(policy)), - first, - last, - selected_result, - rejected_result, - predicate);)); + (ret = detail::stable_partition_copy(policy, first, last, stencil, selected_result, rejected_result, predicate);), + (ret = thrust::stable_partition_copy( + cvt_to_seq(derived_cast(policy)), first, last, stencil, selected_result, rejected_result, predicate);)); return ret; } _CCCL_EXEC_CHECK_DISABLE -template -pair _CCCL_HOST_DEVICE -stable_partition_copy(execution_policy &policy, - InputIt first, - InputIt last, - StencilIt stencil, - SelectedOutIt selected_result, - RejectedOutIt rejected_result, - Predicate predicate) +template +pair _CCCL_HOST_DEVICE stable_partition_copy( + execution_policy& policy, + InputIt first, + InputIt last, + SelectedOutIt selected_result, + RejectedOutIt rejected_result, + Predicate predicate) { auto ret = thrust::make_pair(selected_result, rejected_result); THRUST_CDP_DISPATCH( - (ret = __partition::partition(policy, - first, - last, - stencil, - selected_result, - rejected_result, - predicate);), - (ret = thrust::stable_partition_copy(cvt_to_seq(derived_cast(policy)), - first, - last, - stencil, - selected_result, - rejected_result, - predicate);)); + (ret = detail::stable_partition_copy( + policy, first, last, static_cast(nullptr), selected_result, rejected_result, predicate);), + (ret = thrust::stable_partition_copy( + cvt_to_seq(derived_cast(policy)), first, last, selected_result, rejected_result, predicate);)); return ret; } /// inplace _CCCL_EXEC_CHECK_DISABLE -template +template Iterator _CCCL_HOST_DEVICE -partition(execution_policy &policy, - Iterator first, - Iterator last, - StencilIt stencil, - Predicate predicate) +partition(execution_policy& policy, Iterator first, Iterator last, StencilIt stencil, Predicate predicate) { - THRUST_CDP_DISPATCH( - (last = - __partition::partition_inplace(policy, first, last, stencil, predicate);), - (last = thrust::partition(cvt_to_seq(derived_cast(policy)), - first, - last, - stencil, - predicate);)); + THRUST_CDP_DISPATCH((last = detail::inplace_partition(policy, first, last, stencil, predicate);), + (last = thrust::partition(cvt_to_seq(derived_cast(policy)), first, last, stencil, predicate);)); return last; } _CCCL_EXEC_CHECK_DISABLE -template +template Iterator _CCCL_HOST_DEVICE -partition(execution_policy &policy, - Iterator first, - Iterator last, - Predicate predicate) +partition(execution_policy& policy, Iterator first, Iterator last, Predicate predicate) { THRUST_CDP_DISPATCH( - (last = __partition::partition_inplace(policy, - first, - last, - __partition::no_stencil_tag(), - predicate);), - (last = thrust::partition(cvt_to_seq(derived_cast(policy)), - first, - last, - predicate);)); + (last = detail::inplace_partition(policy, first, last, static_cast(nullptr), predicate);), + (last = thrust::partition(cvt_to_seq(derived_cast(policy)), first, last, predicate);)); return last; } _CCCL_EXEC_CHECK_DISABLE -template -Iterator _CCCL_HOST_DEVICE -stable_partition(execution_policy &policy, - Iterator first, - Iterator last, - StencilIt stencil, - Predicate predicate) +template +Iterator _CCCL_HOST_DEVICE stable_partition( + execution_policy& policy, Iterator first, Iterator last, StencilIt stencil, Predicate predicate) { auto ret = last; THRUST_CDP_DISPATCH( - (ret = - __partition::partition_inplace(policy, first, last, stencil, predicate); + (ret = detail::inplace_partition(policy, first, last, stencil, predicate); /* partition returns rejected values in reverse order so reverse the rejected elements to make it stable */ cuda_cub::reverse(policy, ret, last);), - (ret = thrust::stable_partition(cvt_to_seq(derived_cast(policy)), - first, - last, - stencil, - predicate);)); + (ret = thrust::stable_partition(cvt_to_seq(derived_cast(policy)), first, last, stencil, predicate);)); return ret; } _CCCL_EXEC_CHECK_DISABLE -template +template Iterator _CCCL_HOST_DEVICE -stable_partition(execution_policy &policy, - Iterator first, - Iterator last, - Predicate predicate) +stable_partition(execution_policy& policy, Iterator first, Iterator last, Predicate predicate) { auto ret = last; THRUST_CDP_DISPATCH( - (ret = __partition::partition_inplace(policy, - first, - last, - __partition::no_stencil_tag(), - predicate); + (ret = detail::inplace_partition(policy, first, last, static_cast(nullptr), predicate); /* partition returns rejected values in reverse order so reverse the rejected elements to make it stable */ cuda_cub::reverse(policy, ret, last);), - (ret = thrust::stable_partition(cvt_to_seq(derived_cast(policy)), - first, - last, - predicate);)); + (ret = thrust::stable_partition(cvt_to_seq(derived_cast(policy)), first, last, predicate);)); return ret; } From bc6c83b0cd5387cd798611467e18b5e8c23dae8b Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 14:14:38 +0000 Subject: [PATCH 14/44] Fix include of with NVC++ --- thrust/detail/cstdint.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/thrust/detail/cstdint.h b/thrust/detail/cstdint.h index f41e11475..bd4c79090 100644 --- a/thrust/detail/cstdint.h +++ b/thrust/detail/cstdint.h @@ -18,9 +18,7 @@ #include -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) || \ - (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_CLANG) || \ - (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_INTEL) +#if (THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC) #include #endif From 489c0739dc5966370fb3cd9104a2058c1fc53e3f Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 15:10:32 +0000 Subject: [PATCH 15/44] Cleanup diagnostic handling --- thrust/detail/allocator/allocator_traits.h | 4 ++ thrust/detail/config/compiler.h | 5 +- thrust/detail/config/config.h | 1 + thrust/detail/config/diagnostic.h | 80 ++++++++++++++++++++++ 4 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 thrust/detail/config/diagnostic.h diff --git a/thrust/detail/allocator/allocator_traits.h b/thrust/detail/allocator/allocator_traits.h index 6e7c4a6da..37dedc47b 100644 --- a/thrust/detail/allocator/allocator_traits.h +++ b/thrust/detail/allocator/allocator_traits.h @@ -72,6 +72,8 @@ template typedef thrust::detail::integral_constant type; }; +THRUST_SUPPRESS_DEPRECATED_PUSH + // The following fields of std::allocator have been deprecated (since C++17). // There's no way to detect it other than explicit specialization. #if THRUST_CPP_DIALECT >= 2017 @@ -178,6 +180,8 @@ template static const bool value = type::value; }; +THRUST_SUPPRESS_DEPRECATED_POP + template::value> struct rebind_alloc { diff --git a/thrust/detail/config/compiler.h b/thrust/detail/config/compiler.h index b9a2f71b7..9330c699d 100644 --- a/thrust/detail/config/compiler.h +++ b/thrust/detail/config/compiler.h @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2020 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2020-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,7 @@ #define THRUST_HOST_COMPILER_GCC 2 #define THRUST_HOST_COMPILER_CLANG 3 #define THRUST_HOST_COMPILER_INTEL 4 +#define THRUST_HOST_COMPILER_NVHPC 5 // enumerate device compilers we know about #define THRUST_DEVICE_COMPILER_UNKNOWN 0 @@ -68,6 +69,8 @@ #else #define THRUST_LEGACY_GCC #endif +#elif defined(__NVCOMPILER) + #define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_NVHPC #else #define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_UNKNOWN #endif // THRUST_HOST_COMPILER diff --git a/thrust/detail/config/config.h b/thrust/detail/config/config.h index d56878e9e..61a5a3b9d 100644 --- a/thrust/detail/config/config.h +++ b/thrust/detail/config/config.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/thrust/detail/config/diagnostic.h b/thrust/detail/config/diagnostic.h new file mode 100644 index 000000000..7fe376907 --- /dev/null +++ b/thrust/detail/config/diagnostic.h @@ -0,0 +1,80 @@ +//===----------------------------------------------------------------------===// +// +// Part of libcu++, the C++ Standard Library for your entire system, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +// SPDX-FileCopyrightText: Modifications Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +// +//===----------------------------------------------------------------------===// + +#ifndef THRUST_DETAIL_CONFIG_DIAGNOSTIC_H +#define THRUST_DETAIL_CONFIG_DIAGNOSTIC_H + +#include + +// Enable us to selectively silence host compiler warnings +#define THRUST_TOSTRING2(_STR) #_STR +#define THRUST_TOSTRING(_STR) THRUST_TOSTRING2(_STR) +#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_CLANG +# define THRUST_DIAG_PUSH _Pragma("clang diagnostic push") +# define THRUST_DIAG_POP _Pragma("clang diagnostic pop") +# define THRUST_DIAG_SUPPRESS_CLANG(str) _Pragma(THRUST_TOSTRING(clang diagnostic ignored str)) +# define THRUST_DIAG_SUPPRESS_GCC(str) +# define THRUST_DIAG_SUPPRESS_NVHPC(str) +# define THRUST_DIAG_SUPPRESS_MSVC(str) +#elif (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) || (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_INTEL) +# define THRUST_DIAG_PUSH _Pragma("GCC diagnostic push") +# define THRUST_DIAG_POP _Pragma("GCC diagnostic pop") +# define THRUST_DIAG_SUPPRESS_CLANG(str) +# define THRUST_DIAG_SUPPRESS_GCC(str) _Pragma(THRUST_TOSTRING(GCC diagnostic ignored str)) +# define THRUST_DIAG_SUPPRESS_NVHPC(str) +# define THRUST_DIAG_SUPPRESS_MSVC(str) +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_NVHPC +# define THRUST_DIAG_PUSH _Pragma("diagnostic push") +# define THRUST_DIAG_POP _Pragma("diagnostic pop") +# define THRUST_DIAG_SUPPRESS_CLANG(str) +# define THRUST_DIAG_SUPPRESS_GCC(str) +# define THRUST_DIAG_SUPPRESS_NVHPC(str) _Pragma(THRUST_TOSTRING(diag_suppress str)) +# define THRUST_DIAG_SUPPRESS_MSVC(str) +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC +# define THRUST_DIAG_PUSH __pragma(warning(push)) +# define THRUST_DIAG_POP __pragma(warning(pop)) +# define THRUST_DIAG_SUPPRESS_CLANG(str) +# define THRUST_DIAG_SUPPRESS_GCC(str) +# define THRUST_DIAG_SUPPRESS_NVHPC(str) +# define THRUST_DIAG_SUPPRESS_MSVC(str) __pragma(warning(disable : str)) +#else +# define THRUST_DIAG_PUSH +# define THRUST_DIAG_POP +# define THRUST_DIAG_SUPPRESS_CLANG(str) +# define THRUST_DIAG_SUPPRESS_GCC(str) +# define THRUST_DIAG_SUPPRESS_NVHPC(str) +# define THRUST_DIAG_SUPPRESS_MSVC(str) +#endif + +// Convenient shortcuts to silence common warnings +#if THRUST_HOST_COMPILER == THRUST_DEVICE_COMPILER_CLANG +# define THRUST_SUPPRESS_DEPRECATED_PUSH \ + THRUST_DIAG_PUSH \ + THRUST_DIAG_SUPPRESS_CLANG("-Wdeprecated") \ + THRUST_DIAG_SUPPRESS_CLANG("-Wdeprecated-declarations") +# define THRUST_SUPPRESS_DEPRECATED_POP THRUST_DIAG_POP +#elif (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) || (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_ICC) +# define THRUST_SUPPRESS_DEPRECATED_PUSH \ + THRUST_DIAG_PUSH \ + THRUST_DIAG_SUPPRESS_GCC("-Wdeprecated") \ + THRUST_DIAG_SUPPRESS_GCC("-Wdeprecated-declarations") +# define THRUST_SUPPRESS_DEPRECATED_POP THRUST_DIAG_POP +#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC +# define THRUST_SUPPRESS_DEPRECATED_PUSH \ + THRUST_DIAG_PUSH \ + THRUST_DIAG_SUPPRESS_MSVC(4996) +# define THRUST_SUPPRESS_DEPRECATED_POP THRUST_DIAG_POP +#else // !THRUST_COMPILER_CLANG && !THRUST_COMPILER_GCC +# define THRUST_SUPPRESS_DEPRECATED_PUSH +# define THRUST_SUPPRESS_DEPRECATED_POP +#endif // !THRUST_COMPILER_CLANG && !THRUST_COMPILER_GCC + +#endif // THRUST_DETAIL_CONFIG_DIAGNOSTIC_H \ No newline at end of file From 9f5a3ba323cffd3fcad0b53eec4ba3b9e521abf4 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Mon, 29 Jul 2024 14:09:57 +0000 Subject: [PATCH 16/44] Rework config.h --- thrust/detail/config/config.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/thrust/detail/config/config.h b/thrust/detail/config/config.h index 61a5a3b9d..95285631c 100644 --- a/thrust/detail/config/config.h +++ b/thrust/detail/config/config.h @@ -20,19 +20,25 @@ #pragma once +#if !defined(__HIP__) +// For _CCCL_IMPLICIT_SYSTEM_HEADER +#include +#endif + // NOTE: The order of these #includes matters. -#include #include -#include #include +#include #include +#include // host_system.h & device_system.h must be #included as early as possible // because other config headers depend on it #include + +#include #include #include #include -#include #include #include From 1020a118f8998dfcb12c1b6da811f04678e2c96d Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 15:15:23 +0000 Subject: [PATCH 17/44] Bump version to 2.4.0 --- .gitlab-ci.yml | 2 +- thrust/cmake/thrust-config-version.cmake | 4 ++-- thrust/version.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 775b3b026..777fc576b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -340,7 +340,7 @@ build:cuda-and-omp: tags: - build variables: - CCCL_GIT_BRANCH: v2.3.2 + CCCL_GIT_BRANCH: v2.4.0 CCCL_DIR: ${CI_PROJECT_DIR}/cccl needs: [] script: diff --git a/thrust/cmake/thrust-config-version.cmake b/thrust/cmake/thrust-config-version.cmake index 990c0f3fb..3f78da812 100644 --- a/thrust/cmake/thrust-config-version.cmake +++ b/thrust/cmake/thrust-config-version.cmake @@ -2,8 +2,8 @@ include("${CMAKE_CURRENT_LIST_DIR}/thrust-header-search.cmake") set(THRUST_VERSION_MAJOR 2) -set(THRUST_VERSION_MINOR 3) -set(THRUST_VERSION_PATCH 2) # Thrust: "subminor" CMake: "patch" +set(THRUST_VERSION_MINOR 4) +set(THRUST_VERSION_PATCH 0) # Thrust: "subminor" CMake: "patch" set(THRUST_VERSION_TWEAK 0) set(THRUST_VERSION "${THRUST_VERSION_MAJOR}.${THRUST_VERSION_MINOR}.${THRUST_VERSION_PATCH}.${THRUST_VERSION_TWEAK}") diff --git a/thrust/version.h b/thrust/version.h index 11f2aaef9..570660d86 100644 --- a/thrust/version.h +++ b/thrust/version.h @@ -53,7 +53,7 @@ * THRUST_VERSION / 100 % 1000 is the minor version. * THRUST_VERSION / 100000 is the major version. */ -#define THRUST_VERSION 200302 +#define THRUST_VERSION 200400 /*! \def THRUST_MAJOR_VERSION * \brief The preprocessor macro \p THRUST_MAJOR_VERSION encodes the From 917c255941edd02154b970989622996f3505fcd1 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 15:24:48 +0000 Subject: [PATCH 18/44] Fix issues with ambiguous calls to addressof in thrust::optional --- thrust/optional.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/thrust/optional.h b/thrust/optional.h index 68c638e1c..5ba7f07ee 100644 --- a/thrust/optional.h +++ b/thrust/optional.h @@ -1612,11 +1612,11 @@ class optional : private detail::optional_move_assign_base, using thrust::swap; swap(**this, *rhs); } else { - new (addressof(rhs.m_value)) T(std::move(this->m_value)); + new (thrust::addressof(rhs.m_value)) T(std::move(this->m_value)); this->m_value.T::~T(); } } else if (rhs.has_value()) { - new (addressof(this->m_value)) T(std::move(rhs.m_value)); + new (thrust::addressof(this->m_value)) T(std::move(rhs.m_value)); rhs.m_value.T::~T(); } } @@ -1628,7 +1628,7 @@ class optional : private detail::optional_move_assign_base, THRUST_EXEC_CHECK_DISABLE THRUST_HOST_DEVICE constexpr const T *operator->() const { - return addressof(this->m_value); + return thrust::addressof(this->m_value); } /// \group pointer @@ -1636,7 +1636,7 @@ class optional : private detail::optional_move_assign_base, THRUST_EXEC_CHECK_DISABLE THRUST_HOST_DEVICE THRUST_OPTIONAL_CPP11_CONSTEXPR T *operator->() { - return addressof(this->m_value); + return thrust::addressof(this->m_value); } /// \return the stored value @@ -2682,7 +2682,7 @@ template class optional { detail::enable_if_t>::value> * = nullptr> THRUST_HOST_DEVICE - constexpr optional(U &&u) : m_value(addressof(u)) { + constexpr optional(U &&u) : m_value(thrust::addressof(u)) { static_assert(std::is_lvalue_reference::value, "U must be an lvalue"); } @@ -2724,7 +2724,7 @@ template class optional { THRUST_HOST_DEVICE optional &operator=(U &&u) { static_assert(std::is_lvalue_reference::value, "U must be an lvalue"); - m_value = addressof(u); + m_value = thrust::addressof(u); return *this; } @@ -2736,7 +2736,7 @@ template class optional { template THRUST_HOST_DEVICE optional &operator=(const optional &rhs) { - m_value = addressof(rhs.value()); + m_value = thrust::addressof(rhs.value()); return *this; } @@ -2748,7 +2748,7 @@ template class optional { template THRUST_HOST_DEVICE T &emplace(U& u) noexcept { - m_value = addressof(u); + m_value = thrust::addressof(u); return *m_value; } From 5af1ef79951bbc29290c07f8244569f4bf98ef62 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Mon, 29 Jul 2024 14:42:44 +0000 Subject: [PATCH 19/44] Try harder to unwrap nested thrust::tuple_of_iterator_references, CUDA backend --- testing/zip_function.cu | 128 +++++++++++-- .../detail/tuple_of_iterator_references.h | 179 ++++++++++-------- 2 files changed, 208 insertions(+), 99 deletions(-) diff --git a/testing/zip_function.cu b/testing/zip_function.cu index a1545a1a1..16332fb77 100644 --- a/testing/zip_function.cu +++ b/testing/zip_function.cu @@ -2,29 +2,30 @@ #if THRUST_CPP_DIALECT >= 2011 && !defined(THRUST_LEGACY_GCC) -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include -#include +# include + +# include using namespace unittest; struct SumThree { template - __host__ __device__ - auto operator()(T1 x, T2 y, T3 z) const - THRUST_DECLTYPE_RETURNS(x + y + z) + __host__ __device__ auto operator()(T1 x, T2 y, T3 z) const THRUST_DECLTYPE_RETURNS(x + y + z) }; // end SumThree struct SumThreeTuple { template - __host__ __device__ - auto operator()(Tuple x) const - THRUST_DECLTYPE_RETURNS(thrust::get<0>(x) + thrust::get<1>(x) + thrust::get<2>(x)) + __host__ __device__ auto operator()(Tuple x) const + THRUST_DECLTYPE_RETURNS(thrust::get<0>(x) + thrust::get<1>(x) + thrust::get<2>(x)) }; // end SumThreeTuple template @@ -42,22 +43,22 @@ struct TestZipFunctionTransform device_vector d_data1 = h_data1; device_vector d_data2 = h_data2; - host_vector h_result_tuple(n); - host_vector h_result_zip(n); + host_vector h_result_tuple(n); + host_vector h_result_zip(n); device_vector d_result_zip(n); // Tuple base case transform(make_zip_iterator(make_tuple(h_data0.begin(), h_data1.begin(), h_data2.begin())), - make_zip_iterator(make_tuple(h_data0.end(), h_data1.end(), h_data2.end())), + make_zip_iterator(make_tuple(h_data0.end(), h_data1.end(), h_data2.end())), h_result_tuple.begin(), SumThreeTuple{}); // Zip Function transform(make_zip_iterator(make_tuple(h_data0.begin(), h_data1.begin(), h_data2.begin())), - make_zip_iterator(make_tuple(h_data0.end(), h_data1.end(), h_data2.end())), + make_zip_iterator(make_tuple(h_data0.end(), h_data1.end(), h_data2.end())), h_result_zip.begin(), make_zip_function(SumThree{})); transform(make_zip_iterator(make_tuple(d_data0.begin(), d_data1.begin(), d_data2.begin())), - make_zip_iterator(make_tuple(d_data0.end(), d_data1.end(), d_data2.end())), + make_zip_iterator(make_tuple(d_data0.end(), d_data1.end(), d_data2.end())), d_result_zip.begin(), make_zip_function(SumThree{})); @@ -67,4 +68,99 @@ struct TestZipFunctionTransform }; VariableUnitTest TestZipFunctionTransformInstance; +struct RemovePred +{ + __host__ __device__ bool operator()(const thrust::tuple& ele1, const float&) + { + return thrust::get<0>(ele1) == thrust::get<1>(ele1); + } +}; +template +struct TestZipFunctionMixed +{ + void operator()() + { + thrust::device_vector vecA{0, 0, 2, 0}; + thrust::device_vector vecB{0, 2, 2, 2}; + thrust::device_vector vecC{88.0f, 88.0f, 89.0f, 89.0f}; + thrust::device_vector expected{88.0f, 89.0f}; + + auto inputKeyItBegin = + thrust::make_zip_iterator(thrust::make_zip_iterator(vecA.begin(), vecB.begin()), vecC.begin()); + auto endIt = + thrust::remove_if(inputKeyItBegin, inputKeyItBegin + vecA.size(), thrust::make_zip_function(RemovePred{})); + auto numEle = endIt - inputKeyItBegin; + vecA.resize(numEle); + vecB.resize(numEle); + vecC.resize(numEle); + + ASSERT_EQUAL(numEle, 2); + ASSERT_EQUAL(vecC, expected); + } +}; +SimpleUnitTest > TestZipFunctionMixedInstance; + +struct NestedFunctionCall +{ + __host__ __device__ bool + operator()(const thrust::tuple, thrust::tuple>>& idAndPt) + { + thrust::tuple, thrust::tuple> ele1 = thrust::get<1>(idAndPt); + thrust::tuple p1 = thrust::get<0>(ele1); + thrust::tuple p2 = thrust::get<1>(ele1); + return thrust::get<0>(p1) == thrust::get<0>(p2) || thrust::get<1>(p1) == thrust::get<1>(p2); + } +}; + +template +struct TestNestedZipFunction +{ + void operator()() + { + thrust::device_vector PX{0, 1, 2, 3}; + thrust::device_vector PY{0, 1, 2, 2}; + thrust::device_vector SS{0, 1, 2}; + thrust::device_vector ST{1, 2, 3}; + thrust::device_vector vecC{88.0f, 88.0f, 89.0f, 89.0f}; + + auto segIt = thrust::make_zip_iterator( + thrust::make_zip_iterator(thrust::make_permutation_iterator(PX.begin(), SS.begin()), + thrust::make_permutation_iterator(PY.begin(), SS.begin())), + thrust::make_zip_iterator(thrust::make_permutation_iterator(PX.begin(), ST.begin()), + thrust::make_permutation_iterator(PY.begin(), ST.begin()))); + auto idAndSegIt = thrust::make_zip_iterator(thrust::make_counting_iterator(0u), segIt); + + thrust::device_vector isMH{false, false, false}; + thrust::device_vector expected{false, false, true}; + thrust::transform(idAndSegIt, idAndSegIt + SS.size(), isMH.begin(), NestedFunctionCall{}); + ASSERT_EQUAL(isMH, expected); + } +}; +SimpleUnitTest > TestNestedZipFunctionInstance; + +struct SortPred { + __device__ __forceinline__ + bool operator()(const thrust::tuple, int>& a, + const thrust::tuple, int>& b) { + return thrust::get<1>(a) < thrust::get<1>(b); + } +}; +#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA +template +struct TestNestedZipFunction2 +{ + void operator()() + { + thrust::device_vector A(5); + thrust::device_vector B(5); + thrust::device_vector C(5); + auto n = A.size(); + + auto tupleIt = thrust::make_zip_iterator(cuda::std::begin(A), cuda::std::begin(B)); + auto nestedTupleIt = thrust::make_zip_iterator(tupleIt, cuda::std::begin(C)); + thrust::sort(nestedTupleIt, nestedTupleIt + n, SortPred{}); + } +}; +SimpleUnitTest > TestNestedZipFunctionInstance2; +#endif // THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA #endif // THRUST_CPP_DIALECT diff --git a/thrust/iterator/detail/tuple_of_iterator_references.h b/thrust/iterator/detail/tuple_of_iterator_references.h index 0b46b111e..5934dc946 100644 --- a/thrust/iterator/detail/tuple_of_iterator_references.h +++ b/thrust/iterator/detail/tuple_of_iterator_references.h @@ -18,113 +18,125 @@ #pragma once #include -#include -#include -#include #include +#include +#include +#include #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA -#include #include +#include THRUST_NAMESPACE_BEGIN namespace detail { -template< - typename... Ts -> - class tuple_of_iterator_references : public thrust::tuple +template +class tuple_of_iterator_references; + +template +struct maybe_unwrap_nested { + THRUST_HOST_DEVICE U operator()(const T& t) const { + return t; + } +}; + +template +struct maybe_unwrap_nested, tuple_of_iterator_references> { + THRUST_HOST_DEVICE thrust::tuple operator()(const tuple_of_iterator_references& t) const { + return t.template __to_tuple(typename ::cuda::std::__make_tuple_indices::type{}); + } +}; + +template < typename... Ts > +class tuple_of_iterator_references : public thrust::tuple { - public: - using super_t = thrust::tuple; - using super_t::super_t; +public: + using super_t = thrust::tuple; + using super_t::super_t; - inline THRUST_HOST_DEVICE - tuple_of_iterator_references() + inline THRUST_HOST_DEVICE tuple_of_iterator_references() : super_t() - {} + {} - // allow implicit construction from tuple - inline THRUST_HOST_DEVICE - tuple_of_iterator_references(const super_t& other) + // allow implicit construction from tuple + inline THRUST_HOST_DEVICE tuple_of_iterator_references(const super_t& other) : super_t(other) - {} + {} - inline THRUST_HOST_DEVICE - tuple_of_iterator_references(super_t&& other) + inline THRUST_HOST_DEVICE tuple_of_iterator_references(super_t&& other) : super_t(::cuda::std::move(other)) - {} - - // allow assignment from tuples - // XXX might be worthwhile to guard this with an enable_if is_assignable - THRUST_EXEC_CHECK_DISABLE - template - inline THRUST_HOST_DEVICE - tuple_of_iterator_references &operator=(const thrust::tuple &other) - { - super_t::operator=(other); - return *this; - } - - // allow assignment from pairs - // XXX might be worthwhile to guard this with an enable_if is_assignable - THRUST_EXEC_CHECK_DISABLE - template - inline THRUST_HOST_DEVICE - tuple_of_iterator_references &operator=(const thrust::pair &other) - { - super_t::operator=(other); - return *this; - } - - // allow assignment from reference - // XXX perhaps we should generalize to reference - // we could captures reference this way - THRUST_EXEC_CHECK_DISABLE - template - inline THRUST_HOST_DEVICE - tuple_of_iterator_references& - operator=(const thrust::reference, Pointer, Derived> &other) - { - typedef thrust::tuple tuple_type; - - // XXX perhaps this could be accelerated - super_t::operator=(tuple_type{other}); - return *this; - } - - template = 0> - inline THRUST_HOST_DEVICE - constexpr operator thrust::tuple() const { - return to_tuple(typename ::cuda::std::__make_tuple_indices::type{}); - } - - // this overload of swap() permits swapping tuple_of_iterator_references returned as temporaries from - // iterator dereferences - template - inline THRUST_HOST_DEVICE - friend void swap(tuple_of_iterator_references&& x, tuple_of_iterator_references&& y) - { - x.swap(y); - } - -private: - template - inline THRUST_HOST_DEVICE - constexpr thrust::tuple to_tuple(::cuda::std::__tuple_indices) const { - return {get(*this)...}; - } + {} + + // allow assignment from tuples + // XXX might be worthwhile to guard this with an enable_if is_assignable + THRUST_EXEC_CHECK_DISABLE + template + inline THRUST_HOST_DEVICE tuple_of_iterator_references& operator=(const thrust::tuple& other) + { + super_t::operator=(other); + return *this; + } + + // allow assignment from pairs + // XXX might be worthwhile to guard this with an enable_if is_assignable + THRUST_EXEC_CHECK_DISABLE + template + inline THRUST_HOST_DEVICE tuple_of_iterator_references& operator=(const thrust::pair& other) + { + super_t::operator=(other); + return *this; + } + + // allow assignment from reference + // XXX perhaps we should generalize to reference + // we could captures reference this way + THRUST_EXEC_CHECK_DISABLE + template + inline THRUST_HOST_DEVICE tuple_of_iterator_references& + operator=(const thrust::reference, Pointer, Derived>& other) + { + typedef thrust::tuple tuple_type; + + // XXX perhaps this could be accelerated + super_t::operator=(tuple_type{other}); + return *this; + } + + template = 0> + inline THRUST_HOST_DEVICE constexpr operator thrust::tuple() const + { + return __to_tuple(typename ::cuda::std::__make_tuple_indices::type{}); + } + + // this overload of swap() permits swapping tuple_of_iterator_references returned as temporaries from + // iterator dereferences + template + inline THRUST_HOST_DEVICE friend void swap(tuple_of_iterator_references&& x, tuple_of_iterator_references&& y) + { + x.swap(y); + } + + template + inline THRUST_HOST_DEVICE constexpr thrust::tuple __to_tuple(::cuda::std::__tuple_indices) const + { + return {maybe_unwrap_nested{}(get(*this))...}; + } }; -} // end detail +} // namespace detail THRUST_NAMESPACE_END _LIBCUDACXX_BEGIN_NAMESPACE_STD +template +struct __is_tuple_of_iterator_references> + : integral_constant +{}; + // define tuple_size, tuple_element, etc. template struct tuple_size> @@ -139,7 +151,8 @@ struct tuple_element struct tuple_size> From bd5228c463b87ab54128a40a59084a67a5d82b94 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 15:43:17 +0000 Subject: [PATCH 20/44] Added missing element from thrust's tuple implementation --- testing/tuple.cu | 21 +++++++++++---------- thrust/tuple.h | 22 +++++++++++++--------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/testing/tuple.cu b/testing/tuple.cu index 84aa209bf..af57dbf47 100644 --- a/testing/tuple.cu +++ b/testing/tuple.cu @@ -525,13 +525,14 @@ DECLARE_UNITTEST(TestTupleStructuredBindings); #endif // Ensure that we are backwards compatible with the old thrust::tuple implementation -static_assert(thrust::tuple_size>::value == 0, ""); -static_assert(thrust::tuple_size>::value == 1, ""); -static_assert(thrust::tuple_size>::value == 2, ""); -static_assert(thrust::tuple_size>::value == 3, ""); -static_assert(thrust::tuple_size>::value == 4, ""); -static_assert(thrust::tuple_size>::value == 5, ""); -static_assert(thrust::tuple_size>::value == 6, ""); -static_assert(thrust::tuple_size>::value == 7, ""); -static_assert(thrust::tuple_size>::value == 8, ""); -static_assert(thrust::tuple_size>::value == 9, ""); +static_assert(thrust::tuple_size>::value == 0, ""); +static_assert(thrust::tuple_size>::value == 1, ""); +static_assert(thrust::tuple_size>::value == 2, ""); +static_assert(thrust::tuple_size>::value == 3, ""); +static_assert(thrust::tuple_size>::value == 4, ""); +static_assert(thrust::tuple_size>::value == 5, ""); +static_assert(thrust::tuple_size>::value == 6, ""); +static_assert(thrust::tuple_size>::value == 7, ""); +static_assert(thrust::tuple_size>::value == 8, ""); +static_assert(thrust::tuple_size>::value == 9, ""); +static_assert(thrust::tuple_size>::value == 10, ""); diff --git a/thrust/tuple.h b/thrust/tuple.h index 6f1ca3441..2a5da7d42 100644 --- a/thrust/tuple.h +++ b/thrust/tuple.h @@ -35,6 +35,7 @@ #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA #include +#include #include #include @@ -156,31 +157,34 @@ THRUST_NAMESPACE_END _LIBCUDACXX_BEGIN_NAMESPACE_STD template<> -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; template -struct tuple_size> : tuple_size> {}; +struct tuple_size> : tuple_size> {}; + +template +struct tuple_size> : tuple_size> {}; _LIBCUDACXX_END_NAMESPACE_STD From 099a9019d14962f05442dcd2baa4a60a20073a59 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 25 Jul 2024 16:06:56 +0000 Subject: [PATCH 21/44] Ensure that we can run reduce_by_key with const inputs --- test/test_zip_iterator_reduce_by_key.cpp | 49 ++++++++++++++++++++++- testing/zip_iterator_reduce_by_key.cu | 47 +++++++++++++++++++++- thrust/system/cuda/detail/reduce_by_key.h | 4 +- thrust/system/hip/detail/reduce_by_key.h | 2 +- 4 files changed, 97 insertions(+), 5 deletions(-) diff --git a/test/test_zip_iterator_reduce_by_key.cpp b/test/test_zip_iterator_reduce_by_key.cpp index 4bbfa1574..c543c263d 100644 --- a/test/test_zip_iterator_reduce_by_key.cpp +++ b/test/test_zip_iterator_reduce_by_key.cpp @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -156,6 +156,53 @@ TYPED_TEST(ZipIteratorReduceByKeyTests, TestZipIteratorReduceByKey) ASSERT_EQ(h_data5, d_data5); ASSERT_EQ(h_data6, d_data6); } + + // const inputs + { + thrust::host_vector h_data3(size, 0.0f); + thrust::host_vector h_data4(size, 0); + thrust::host_vector h_data5(size, 0); + thrust::host_vector h_data6(size, 0.0f); + thrust::device_vector d_data3(size, 0.0f); + thrust::device_vector d_data4(size, 0); + thrust::device_vector d_data5(size, 0); + thrust::device_vector d_data6(size, 0.0f); + + // run on host + const T* h_begin1 = thrust::raw_pointer_cast(h_data1.data()); + const T* h_begin2 = thrust::raw_pointer_cast(h_data2.data()); + const float* h_begin3 = thrust::raw_pointer_cast(h_data3.data()); + T* h_begin4 = thrust::raw_pointer_cast(h_data4.data()); + T* h_begin5 = thrust::raw_pointer_cast(h_data5.data()); + float* h_begin6 = thrust::raw_pointer_cast(h_data6.data()); + thrust::reduce_by_key( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(h_begin1, h_begin2)), + thrust::make_zip_iterator(thrust::make_tuple(h_begin1, h_begin2)) + size, + h_begin3, + thrust::make_zip_iterator(thrust::make_tuple(h_begin4, h_begin5)), + h_begin6); + + // run on device + const T* d_begin1 = thrust::raw_pointer_cast(d_data1.data()); + const T* d_begin2 = thrust::raw_pointer_cast(d_data2.data()); + const float* d_begin3 = thrust::raw_pointer_cast(d_data3.data()); + T* d_begin4 = thrust::raw_pointer_cast(d_data4.data()); + T* d_begin5 = thrust::raw_pointer_cast(d_data5.data()); + float* d_begin6 = thrust::raw_pointer_cast(d_data6.data()); + thrust::reduce_by_key( + thrust::device, + thrust::make_zip_iterator(thrust::make_tuple(d_begin1, d_begin2)), + thrust::make_zip_iterator(thrust::make_tuple(d_begin1, d_begin2)) + size, + d_begin3, + thrust::make_zip_iterator(thrust::make_tuple(d_begin4, d_begin5)), + d_begin6); + + ASSERT_EQ(h_data3, d_data3); + ASSERT_EQ(h_data4, d_data4); + ASSERT_EQ(h_data5, d_data5); + ASSERT_EQ(h_data6, d_data6); + } } } } diff --git a/testing/zip_iterator_reduce_by_key.cu b/testing/zip_iterator_reduce_by_key.cu index e3fc99d66..9076bcba5 100644 --- a/testing/zip_iterator_reduce_by_key.cu +++ b/testing/zip_iterator_reduce_by_key.cu @@ -69,7 +69,7 @@ struct TestZipIteratorReduceByKey ASSERT_EQUAL(h_data4, d_data4); ASSERT_EQUAL(h_data5, d_data5); } - + // The tests below get miscompiled on Tesla hw for 8b types #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA @@ -118,6 +118,51 @@ struct TestZipIteratorReduceByKey ASSERT_EQUAL(h_data5, d_data5); ASSERT_EQUAL(h_data6, d_data6); } + + // const inputs, see #1527 + { + host_vector h_data3(n, 0.0f); + host_vector h_data4(n, 0); + host_vector h_data5(n, 0); + host_vector h_data6(n, 0.0f); + device_vector d_data3(n, 0.0f); + device_vector d_data4(n, 0); + device_vector d_data5(n, 0); + device_vector d_data6(n, 0.0f); + + // run on host + const T* h_begin1 = thrust::raw_pointer_cast(h_data1.data()); + const T* h_begin2 = thrust::raw_pointer_cast(h_data2.data()); + const float* h_begin3 = thrust::raw_pointer_cast(h_data3.data()); + T* h_begin4 = thrust::raw_pointer_cast(h_data4.data()); + T* h_begin5 = thrust::raw_pointer_cast(h_data5.data()); + float* h_begin6 = thrust::raw_pointer_cast(h_data6.data()); + thrust::reduce_by_key(thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(h_begin1, h_begin2)), + thrust::make_zip_iterator(thrust::make_tuple(h_begin1, h_begin2)) + n, + h_begin3, + thrust::make_zip_iterator(thrust::make_tuple(h_begin4, h_begin5)), + h_begin6); + + // run on device + const T* d_begin1 = thrust::raw_pointer_cast(d_data1.data()); + const T* d_begin2 = thrust::raw_pointer_cast(d_data2.data()); + const float* d_begin3 = thrust::raw_pointer_cast(d_data3.data()); + T* d_begin4 = thrust::raw_pointer_cast(d_data4.data()); + T* d_begin5 = thrust::raw_pointer_cast(d_data5.data()); + float* d_begin6 = thrust::raw_pointer_cast(d_data6.data()); + thrust::reduce_by_key(thrust::device, + thrust::make_zip_iterator(thrust::make_tuple(d_begin1, d_begin2)), + thrust::make_zip_iterator(thrust::make_tuple(d_begin1, d_begin2)) + n, + d_begin3, + thrust::make_zip_iterator(thrust::make_tuple(d_begin4, d_begin5)), + d_begin6); + + ASSERT_EQUAL(h_data3, d_data3); + ASSERT_EQUAL(h_data4, d_data4); + ASSERT_EQUAL(h_data5, d_data5); + ASSERT_EQUAL(h_data6, d_data6); + } } }; VariableUnitTest TestZipIteratorReduceByKeyInstance; diff --git a/thrust/system/cuda/detail/reduce_by_key.h b/thrust/system/cuda/detail/reduce_by_key.h index c17c2a312..48d944f74 100644 --- a/thrust/system/cuda/detail/reduce_by_key.h +++ b/thrust/system/cuda/detail/reduce_by_key.h @@ -680,7 +680,7 @@ namespace __reduce_by_key { } key_type tile_pred_key = (threadIdx.x == 0) - ? keys_load_it[tile_offset - 1] + ? key_type(keys_load_it[tile_offset - 1]) : key_type(); sync_threadblock(); @@ -1049,7 +1049,7 @@ namespace __reduce_by_key { status = cuda_cub::synchronize(policy); cuda_cub::throw_on_error(status, "reduce_by_key: failed to synchronize"); - int num_runs_out = cuda_cub::get_value(policy, d_num_runs_out); + const auto num_runs_out = cuda_cub::get_value(policy, d_num_runs_out); return thrust::make_pair( keys_output + num_runs_out, diff --git a/thrust/system/hip/detail/reduce_by_key.h b/thrust/system/hip/detail/reduce_by_key.h index 7538336ef..de9f03384 100644 --- a/thrust/system/hip/detail/reduce_by_key.h +++ b/thrust/system/hip/detail/reduce_by_key.h @@ -222,7 +222,7 @@ namespace __reduce_by_key debug_sync), "reduce_by_key failed on 2nd step"); - size_type num_runs_out = hip_rocprim::get_value(policy, d_num_runs_out); + const auto num_runs_out = hip_rocprim::get_value(policy, d_num_runs_out); return thrust::make_pair(keys_output + num_runs_out, values_output + num_runs_out); } From 95084701da86c81228dd489f6ccec2308bd18575 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Tue, 30 Jul 2024 13:23:40 +0000 Subject: [PATCH 22/44] Leave definitions of __host__ and __device__ This prevents CCCL/thrust's build breakage because of v2.4.0 changes --- thrust/detail/config/config.h | 2 ++ thrust/detail/config/forceinline.h | 41 +++++++++++++++++++++++++++ thrust/detail/config/host_device.h | 45 ++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) create mode 100644 thrust/detail/config/forceinline.h create mode 100644 thrust/detail/config/host_device.h diff --git a/thrust/detail/config/config.h b/thrust/detail/config/config.h index 95285631c..7e04d5a28 100644 --- a/thrust/detail/config/config.h +++ b/thrust/detail/config/config.h @@ -40,5 +40,7 @@ #include #include #include +#include +#include #include #include diff --git a/thrust/detail/config/forceinline.h b/thrust/detail/config/forceinline.h new file mode 100644 index 000000000..5ce645b36 --- /dev/null +++ b/thrust/detail/config/forceinline.h @@ -0,0 +1,41 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file forceinline.h + * \brief Defines __thrust_forceinline__ + */ + +#pragma once + +// Internal config header that is only included through thrust/detail/config/config.h + + +#if defined(__CUDACC__) || defined(_NVHPC_CUDA) + +#define __thrust_forceinline__ __forceinline__ + +#elif THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_HIP + +#define __thrust_forceinline__ __forceinline__ + +#else + +// TODO add + +#define __thrust_forceinline__ + +#endif diff --git a/thrust/detail/config/host_device.h b/thrust/detail/config/host_device.h new file mode 100644 index 000000000..de33aefb6 --- /dev/null +++ b/thrust/detail/config/host_device.h @@ -0,0 +1,45 @@ +/* + * Copyright 2008-2013 NVIDIA Corporation + * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file host_device.h + * \brief Defines __host__ and __device__ + */ + +#pragma once + +#include + +// since nvcc defines __host__ and __device__ for us, +// and only nvcc knows what to do with __host__ and __device__, +// define them to be the empty string for other compilers + +#if (THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC) && (THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_HIP) + +// since __host__ & __device__ might have already be defined, only +// #define them if not defined already +// XXX this will break if the client does #include later + +#ifndef __host__ +#define __host__ +#endif // __host__ + +#ifndef __device__ +#define __device__ +#endif // __device__ + +#endif + From 6791366c92e87ba5dfb376fb296daf56466886d4 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Tue, 30 Jul 2024 13:59:38 +0000 Subject: [PATCH 23/44] Patched up CI because of CCCL2.4.0 tests' build failure --- .gitlab-ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 777fc576b..e971bff8c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -349,11 +349,13 @@ build:cuda-and-omp: - rm -R $CCCL_DIR/thrust/thrust - cp -r $CI_PROJECT_DIR/thrust $CCCL_DIR/thrust # Build tests and examples from CCCL Thrust + # CCCL 2.4.0 breaks compilation of tests. Compile examples only until we + # match v2.5.0. - cmake -G Ninja -D CMAKE_BUILD_TYPE=Release -D CMAKE_CUDA_ARCHITECTURES="$GPU_TARGETS" - -D THRUST_ENABLE_TESTING=ON + -D THRUST_ENABLE_TESTING=OFF -D THRUST_ENABLE_EXAMPLES=ON -D THRUST_ENABLE_BENCHMARKS=OFF -D THRUST_ENABLE_MULTICONFIG=ON From 9fe0b042ecfc102c63daf6671fe9614750c3ebf5 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Wed, 31 Jul 2024 08:28:15 +0000 Subject: [PATCH 24/44] Updated tests and examples for __host__ __device__ use --- examples/arbitrary_transformation.cu | 2 + examples/bounding_box.cu | 2 + examples/bucket_sort2d.cu | 2 + examples/cuda/range_view.cu | 1 + examples/discrete_voronoi.cu | 1 + examples/dot_products_with_zip.cu | 1 + examples/include/host_device.h | 30 ++++++ examples/lambda.cu | 2 + examples/max_abs_diff.cu | 2 + examples/minmax.cu | 2 + examples/monte_carlo.cu | 2 + examples/monte_carlo_disjoint_sequences.cu | 4 +- examples/norm.cu | 6 +- examples/padded_grid_reduction.cu | 2 + examples/raw_reference_cast.cu | 2 + examples/remove_points2d.cu | 2 + examples/repeated_range.cu | 2 + examples/saxpy.cu | 2 + examples/scan_by_key.cu | 2 + examples/scan_matrix_by_rows.cu | 1 + examples/simple_moving_average.cu | 2 + examples/sort.cu | 2 + examples/sorting_aos_vs_soa.cu | 1 + examples/stream_compaction.cu | 2 + examples/strided_range.cu | 2 + examples/sum_rows.cu | 2 + examples/summary_statistics.cu | 2 + examples/summed_area_table.cu | 2 + examples/tiled_range.cu | 2 + examples/transform_input_output_iterator.cu | 2 + examples/transform_iterator.cu | 2 + examples/transform_output_iterator.cu | 3 +- examples/uninitialized_vector.cu | 2 + examples/word_count.cu | 2 + testing/allocator.cu | 22 ++--- testing/async/exclusive_scan/large_indices.cu | 8 +- .../async/exclusive_scan/stateful_operator.cu | 2 +- testing/async/inclusive_scan/large_indices.cu | 8 +- .../async/inclusive_scan/stateful_operator.cu | 2 +- testing/async_copy.cu | 16 +-- testing/async_for_each.cu | 6 +- testing/async_reduce.cu | 22 ++--- testing/async_reduce_into.cu | 8 +- testing/async_sort.cu | 12 +-- testing/async_transform.cu | 14 +-- testing/complex.cu | 21 +++- testing/complex_transform.cu | 97 ++++++++----------- testing/copy.cu | 26 ++--- testing/count.cu | 2 +- testing/cpp/adjacent_difference.cu | 10 +- testing/cuda/adjacent_difference.cu | 10 +- testing/cuda/copy_if.cu | 16 +-- testing/cuda/count.cu | 5 +- testing/cuda/device_side_universal_vector.cu | 2 +- testing/cuda/find.cu | 18 ++-- testing/cuda/for_each.cu | 23 +++-- testing/cuda/gather.cu | 5 +- testing/cuda/generate.cu | 16 +-- testing/cuda/is_partitioned.cu | 2 +- testing/cuda/merge_sort.cu | 5 +- testing/cuda/pair_sort.cu | 5 +- testing/cuda/pair_sort_by_key.cu | 5 +- testing/cuda/partition.cu | 10 +- testing/cuda/partition_point.cu | 6 +- testing/cuda/reduce_by_key.cu | 20 ++-- testing/cuda/remove.cu | 12 ++- testing/cuda/replace.cu | 5 +- testing/cuda/scan.cu | 9 +- testing/cuda/scatter.cu | 5 +- testing/cuda/sort.cu | 3 +- testing/cuda/sort_by_key.cu | 2 +- testing/cuda/unique.cu | 6 +- testing/cuda/unique_by_key.cu | 6 +- testing/device_delete.cu | 4 +- testing/docs/doxybook_test.h | 15 +-- testing/equal.cu | 2 +- testing/event.cu | 12 +-- testing/fill.cu | 8 +- testing/find.cu | 10 +- testing/for_each.cu | 14 +-- testing/functional_placeholders_arithmetic.cu | 2 +- testing/functional_placeholders_bitwise.cu | 2 +- ...tional_placeholders_compound_assignment.cu | 28 +++--- .../functional_placeholders_miscellaneous.cu | 6 +- testing/future.cu | 12 +-- testing/gather.cu | 2 +- testing/generate.cu | 10 +- testing/generate_const_iterators.cu | 2 +- testing/inner_product.cu | 2 +- testing/is_contiguous_iterator.cu | 4 +- testing/is_operator_function_object.cu | 12 +-- testing/is_partitioned.cu | 10 +- testing/is_sorted_until.cu | 4 +- testing/logical.cu | 12 +-- testing/mr_disjoint_pool.cu | 2 +- testing/mr_pool.cu | 18 ++-- testing/out_of_memory_recovery.cu | 4 +- testing/pair_reduce.cu | 8 +- testing/pair_scan.cu | 8 +- testing/pair_scan_by_key.cu | 8 +- testing/pair_sort.cu | 5 +- testing/pair_sort_by_key.cu | 5 +- testing/pair_transform.cu | 8 +- testing/partition.cu | 36 +++---- testing/partition_point.cu | 10 +- testing/random.cu | 45 +++------ testing/reduce.cu | 6 +- testing/reduce_by_key.cu | 2 +- ...vbug_2318871__zip_iterator_with_complex.cu | 3 +- ...g_1940974__merge_with_constant_iterator.cu | 2 +- ...__scan_requires_assignability_from_zero.cu | 2 +- ...requires_assignability_from_zero.fixed0.cu | 2 +- ...requires_assignability_from_zero.fixed1.cu | 2 +- testing/remove.cu | 30 +++--- testing/replace.cu | 26 ++--- testing/scan.cu | 24 ++--- testing/scan_by_key.exclusive.cu | 2 +- testing/scan_by_key.inclusive.cu | 2 +- testing/scatter.cu | 10 +- testing/sequence.cu | 12 +-- testing/sort_permutation_iterator.cu | 2 +- testing/stable_sort.cu | 4 +- testing/stable_sort_by_key.cu | 2 +- testing/stable_sort_by_key_large_values.cu | 2 +- testing/swap_ranges.cu | 30 +++--- testing/transform.cu | 26 ++--- testing/transform_reduce.cu | 4 +- testing/transform_scan.cu | 10 +- testing/tuple.cu | 6 +- testing/tuple_algorithms.cu | 6 +- testing/tuple_reduce.cu | 4 +- testing/tuple_scan.cu | 4 +- testing/tuple_sort.cu | 4 +- testing/tuple_transform.cu | 4 +- testing/uninitialized_copy.cu | 6 +- testing/uninitialized_fill.cu | 6 +- testing/unique.cu | 2 +- testing/unique_by_key.cu | 4 +- testing/unittest/runtime_static_assert.h | 10 +- testing/unittest/special_types.h | 28 +++--- testing/unittest/testframework.h | 26 ++--- testing/unittest/util_async.h | 11 +-- testing/unittest_static_assert.cu | 2 +- testing/vector.cu | 2 +- testing/zip_function.cu | 4 +- testing/zip_iterator.cu | 14 ++- testing/zip_iterator_reduce.cu | 2 +- testing/zip_iterator_reduce_by_key.cu | 3 +- testing/zip_iterator_scan.cu | 2 +- thrust/complex.h | 45 +++------ thrust/detail/config/execution_space.h | 4 + 151 files changed, 684 insertions(+), 620 deletions(-) create mode 100644 examples/include/host_device.h diff --git a/examples/arbitrary_transformation.cu b/examples/arbitrary_transformation.cu index be22c2e5a..1453c45dd 100644 --- a/examples/arbitrary_transformation.cu +++ b/examples/arbitrary_transformation.cu @@ -9,6 +9,8 @@ #include #endif // >= C++11 +#include "include/host_device.h" + // This example shows how to implement an arbitrary transformation of // the form output[i] = F(first[i], second[i], third[i], ... ). // In this example, we use a function with 3 inputs and 1 output. diff --git a/examples/bounding_box.cu b/examples/bounding_box.cu index cca71a45e..13228052e 100644 --- a/examples/bounding_box.cu +++ b/examples/bounding_box.cu @@ -4,6 +4,8 @@ #include #include +#include "include/host_device.h" + // This example shows how to compute a bounding box // for a set of points in two dimensions. diff --git a/examples/bucket_sort2d.cu b/examples/bucket_sort2d.cu index 9e3bb2720..d5c0ef57d 100644 --- a/examples/bucket_sort2d.cu +++ b/examples/bucket_sort2d.cu @@ -9,6 +9,8 @@ #include #include +#include "include/host_device.h" + // define a 2d float vector typedef thrust::tuple vec2; diff --git a/examples/cuda/range_view.cu b/examples/cuda/range_view.cu index 2ede62047..e26051668 100644 --- a/examples/cuda/range_view.cu +++ b/examples/cuda/range_view.cu @@ -4,6 +4,7 @@ #include #include +#include "../include/host_device.h" // This example demonstrates the use of a view: a non-owning wrapper for an // iterator range which presents a container-like interface to the user. diff --git a/examples/discrete_voronoi.cu b/examples/discrete_voronoi.cu index bfbf2242d..5555ac706 100644 --- a/examples/discrete_voronoi.cu +++ b/examples/discrete_voronoi.cu @@ -10,6 +10,7 @@ #include #include +#include "include/host_device.h" #include "include/timer.h" // Compute an approximate Voronoi Diagram with a Jump Flooding Algorithm (JFA) diff --git a/examples/dot_products_with_zip.cu b/examples/dot_products_with_zip.cu index 81ff7ac12..2628eaa10 100644 --- a/examples/dot_products_with_zip.cu +++ b/examples/dot_products_with_zip.cu @@ -5,6 +5,7 @@ #include #include +#include "include/host_device.h" // This example shows how thrust::zip_iterator can be used to create a // 'virtual' array of structures. In this case the structure is a 3d diff --git a/examples/include/host_device.h b/examples/include/host_device.h new file mode 100644 index 000000000..f7a4a9547 --- /dev/null +++ b/examples/include/host_device.h @@ -0,0 +1,30 @@ +/* + * Copyright 2008-2009 NVIDIA Corporation + * Modifications Copyright© 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#if THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC + +# ifndef __host__ +# define __host__ +# endif + +# ifndef __device__ +# define __device__ +# endif + +#endif diff --git a/examples/lambda.cu b/examples/lambda.cu index 65b75f627..febe44b54 100644 --- a/examples/lambda.cu +++ b/examples/lambda.cu @@ -3,6 +3,8 @@ #include #include +#include "include/host_device.h" + // This example demonstrates the use of placeholders to implement // the SAXPY operation (i.e. Y[i] = a * X[i] + Y[i]). // diff --git a/examples/max_abs_diff.cu b/examples/max_abs_diff.cu index c9ae4d337..0e379fb56 100644 --- a/examples/max_abs_diff.cu +++ b/examples/max_abs_diff.cu @@ -5,6 +5,8 @@ #include #include +#include "include/host_device.h" + // this example computes the maximum absolute difference // between the elements of two vectors diff --git a/examples/minmax.cu b/examples/minmax.cu index 3b4a53881..04b1cbf00 100644 --- a/examples/minmax.cu +++ b/examples/minmax.cu @@ -6,6 +6,8 @@ #include +#include "include/host_device.h" + // compute minimum and maximum values in a single reduction // minmax_pair stores the minimum and maximum diff --git a/examples/monte_carlo.cu b/examples/monte_carlo.cu index 4a11c4de8..ae750e616 100644 --- a/examples/monte_carlo.cu +++ b/examples/monte_carlo.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // we could vary M & N to find the perf sweet spot __host__ __device__ diff --git a/examples/monte_carlo_disjoint_sequences.cu b/examples/monte_carlo_disjoint_sequences.cu index 77b0d0086..3ed2daf2f 100644 --- a/examples/monte_carlo_disjoint_sequences.cu +++ b/examples/monte_carlo_disjoint_sequences.cu @@ -6,7 +6,9 @@ #include #include -// The technique demonstrated in the example monte_carlo.cu +#include "include/host_device.h" + +// The technique demonstrated in the example monte_carlo.cu // assigns an independently seeded random number generator to each // of 30K threads, and uses a hashing scheme based on thread index to // seed each RNG. This technique, while simple, may be succeptible diff --git a/examples/norm.cu b/examples/norm.cu index 0892baaf9..4f663a260 100644 --- a/examples/norm.cu +++ b/examples/norm.cu @@ -5,8 +5,10 @@ #include #include -// This example computes the norm [1] of a vector. The norm is -// computed by squaring all numbers in the vector, summing the +#include "include/host_device.h" + +// This example computes the norm [1] of a vector. The norm is +// computed by squaring all numbers in the vector, summing the // squares, and taking the square root of the sum of squares. In // Thrust this operation is efficiently implemented with the // transform_reduce() algorith. Specifically, we first transform diff --git a/examples/padded_grid_reduction.cu b/examples/padded_grid_reduction.cu index 2467debca..3a5e68e9e 100644 --- a/examples/padded_grid_reduction.cu +++ b/examples/padded_grid_reduction.cu @@ -10,6 +10,8 @@ #include #include +#include "include/host_device.h" + // This example computes the minimum and maximum values // over a padded grid. The padded values are not considered // during the reduction operation. diff --git a/examples/raw_reference_cast.cu b/examples/raw_reference_cast.cu index d6c854590..335476d5c 100644 --- a/examples/raw_reference_cast.cu +++ b/examples/raw_reference_cast.cu @@ -4,6 +4,8 @@ #include #include +#include "include/host_device.h" + // This example illustrates how to use the raw_reference_cast to convert // system-specific reference wrappers into native references. // diff --git a/examples/remove_points2d.cu b/examples/remove_points2d.cu index 0bca500a6..5dc5e14dc 100644 --- a/examples/remove_points2d.cu +++ b/examples/remove_points2d.cu @@ -2,6 +2,8 @@ #include #include +#include "include/host_device.h" + // This example generates random points in the // unit square [0,1)x[0,1) and then removes all // points where x^2 + y^2 > 1 diff --git a/examples/repeated_range.cu b/examples/repeated_range.cu index a309b80a6..73557ff09 100644 --- a/examples/repeated_range.cu +++ b/examples/repeated_range.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // this example illustrates how to make repeated access to a range of values // examples: // repeated_range([0, 1, 2, 3], 1) -> [0, 1, 2, 3] diff --git a/examples/saxpy.cu b/examples/saxpy.cu index bbc6b6156..3224fac5f 100644 --- a/examples/saxpy.cu +++ b/examples/saxpy.cu @@ -6,6 +6,8 @@ #include #include +#include "include/host_device.h" + // This example illustrates how to implement the SAXPY // operation (Y[i] = a * X[i] + Y[i]) using Thrust. // The saxpy_slow function demonstrates the most diff --git a/examples/scan_by_key.cu b/examples/scan_by_key.cu index 75428b194..7582d20d0 100644 --- a/examples/scan_by_key.cu +++ b/examples/scan_by_key.cu @@ -3,6 +3,8 @@ #include #include +#include "include/host_device.h" + // BinaryPredicate for the head flag segment representation // equivalent to thrust::not2(thrust::project2nd())); template diff --git a/examples/scan_matrix_by_rows.cu b/examples/scan_matrix_by_rows.cu index 2cf1986e9..4fd152ea6 100644 --- a/examples/scan_matrix_by_rows.cu +++ b/examples/scan_matrix_by_rows.cu @@ -4,6 +4,7 @@ #include #include +#include "include/host_device.h" #include // We have a matrix stored in a `thrust::device_vector`. We want to perform a diff --git a/examples/simple_moving_average.cu b/examples/simple_moving_average.cu index 523e8fb37..199a5e786 100644 --- a/examples/simple_moving_average.cu +++ b/examples/simple_moving_average.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // Efficiently computes the simple moving average (SMA) [1] of a data series // using a parallel prefix-sum or "scan" operation. // diff --git a/examples/sort.cu b/examples/sort.cu index 1bbb5d897..cb40d58b4 100644 --- a/examples/sort.cu +++ b/examples/sort.cu @@ -4,6 +4,8 @@ #include #include +#include "include/host_device.h" + // Helper routines void initialize(thrust::device_vector& v) diff --git a/examples/sorting_aos_vs_soa.cu b/examples/sorting_aos_vs_soa.cu index 649a78ab1..5b7976968 100644 --- a/examples/sorting_aos_vs_soa.cu +++ b/examples/sorting_aos_vs_soa.cu @@ -4,6 +4,7 @@ #include #include +#include "include/host_device.h" #include "include/timer.h" // This examples compares sorting performance using Array of Structures (AoS) diff --git a/examples/stream_compaction.cu b/examples/stream_compaction.cu index 95316b06a..11c7b7db4 100644 --- a/examples/stream_compaction.cu +++ b/examples/stream_compaction.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // this functor returns true if the argument is odd, and false otherwise template struct is_odd : public thrust::unary_function diff --git a/examples/strided_range.cu b/examples/strided_range.cu index 3457bc1ca..b719d40cd 100644 --- a/examples/strided_range.cu +++ b/examples/strided_range.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // this example illustrates how to make strided access to a range of values // examples: // strided_range([0, 1, 2, 3, 4, 5, 6], 1) -> [0, 1, 2, 3, 4, 5, 6] diff --git a/examples/sum_rows.cu b/examples/sum_rows.cu index 4d8a2e11f..bb3a175c5 100644 --- a/examples/sum_rows.cu +++ b/examples/sum_rows.cu @@ -6,6 +6,8 @@ #include #include +#include "include/host_device.h" + // convert a linear index to a row index template struct linear_index_to_row_index : public thrust::unary_function diff --git a/examples/summary_statistics.cu b/examples/summary_statistics.cu index 38785e2b7..c685beff6 100644 --- a/examples/summary_statistics.cu +++ b/examples/summary_statistics.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // This example computes several statistical properties of a data // series in a single reduction. The algorithm is described in detail here: // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm diff --git a/examples/summed_area_table.cu b/examples/summed_area_table.cu index d962df25b..6a865cae1 100644 --- a/examples/summed_area_table.cu +++ b/examples/summed_area_table.cu @@ -8,6 +8,8 @@ #include #include +#include "include/host_device.h" + // This example computes a summed area table using segmented scan // http://en.wikipedia.org/wiki/Summed_area_table diff --git a/examples/tiled_range.cu b/examples/tiled_range.cu index 51cc27d5f..157787620 100644 --- a/examples/tiled_range.cu +++ b/examples/tiled_range.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // this example illustrates how to tile a range multiple times // examples: // tiled_range([0, 1, 2, 3], 1) -> [0, 1, 2, 3] diff --git a/examples/transform_input_output_iterator.cu b/examples/transform_input_output_iterator.cu index afdccc35a..8a725862c 100644 --- a/examples/transform_input_output_iterator.cu +++ b/examples/transform_input_output_iterator.cu @@ -6,6 +6,8 @@ #include #include +#include "include/host_device.h" + // Base 2 fixed point class ScaledInteger { diff --git a/examples/transform_iterator.cu b/examples/transform_iterator.cu index 1da8f1e13..c2b4408c7 100644 --- a/examples/transform_iterator.cu +++ b/examples/transform_iterator.cu @@ -7,6 +7,8 @@ #include #include +#include "include/host_device.h" + // this functor clamps a value to the range [lo, hi] template struct clamp : public thrust::unary_function diff --git a/examples/transform_output_iterator.cu b/examples/transform_output_iterator.cu index 1c5a05e06..45017b8cc 100644 --- a/examples/transform_output_iterator.cu +++ b/examples/transform_output_iterator.cu @@ -4,7 +4,8 @@ #include #include -struct Functor +#include "include/host_device.h" +struct Functor { template __host__ __device__ diff --git a/examples/uninitialized_vector.cu b/examples/uninitialized_vector.cu index 90e8141fa..a397cf777 100644 --- a/examples/uninitialized_vector.cu +++ b/examples/uninitialized_vector.cu @@ -10,6 +10,8 @@ #include #include +#include "include/host_device.h" + // uninitialized_allocator is an allocator which // derives from device_allocator and which has a // no-op construct member function diff --git a/examples/word_count.cu b/examples/word_count.cu index 4836c8600..63663eb5d 100644 --- a/examples/word_count.cu +++ b/examples/word_count.cu @@ -5,6 +5,8 @@ #include +#include "include/host_device.h" + // This example computes the number of words in a text sample // with a single call to thrust::inner_product. The algorithm // counts the number of characters which start a new word, i.e. diff --git a/testing/allocator.cu b/testing/allocator.cu index 291f353a7..f07b404af 100644 --- a/testing/allocator.cu +++ b/testing/allocator.cu @@ -28,11 +28,11 @@ template struct my_allocator_with_custom_construct1 : thrust::device_malloc_allocator { - __host__ __device__ + THRUST_HOST_DEVICE my_allocator_with_custom_construct1() {} - __host__ __device__ + THRUST_HOST_DEVICE void construct(T *p) { *p = 13; @@ -53,12 +53,12 @@ template struct my_allocator_with_custom_construct2 : thrust::device_malloc_allocator { - __host__ __device__ + THRUST_HOST_DEVICE my_allocator_with_custom_construct2() {} template - __host__ __device__ + THRUST_HOST_DEVICE void construct(T *p, const Arg &) { *p = 13; @@ -89,18 +89,18 @@ struct my_allocator_with_custom_destroy static bool g_state; - __host__ + THRUST_HOST my_allocator_with_custom_destroy(){} - __host__ + THRUST_HOST my_allocator_with_custom_destroy(const my_allocator_with_custom_destroy &other) : use_me_to_alloc(other.use_me_to_alloc) {} - __host__ + THRUST_HOST ~my_allocator_with_custom_destroy(){} - __host__ __device__ + THRUST_HOST_DEVICE void destroy(T *) { NV_IF_TARGET(NV_IS_HOST, (g_state = true;)); @@ -161,15 +161,15 @@ struct my_minimal_allocator typedef T & reference; typedef const T & const_reference; - __host__ + THRUST_HOST my_minimal_allocator(){} - __host__ + THRUST_HOST my_minimal_allocator(const my_minimal_allocator &other) : use_me_to_alloc(other.use_me_to_alloc) {} - __host__ + THRUST_HOST ~my_minimal_allocator(){} value_type *allocate(std::ptrdiff_t n) diff --git a/testing/async/exclusive_scan/large_indices.cu b/testing/async/exclusive_scan/large_indices.cu index 9e821bc16..e92bb510e 100644 --- a/testing/async/exclusive_scan/large_indices.cu +++ b/testing/async/exclusive_scan/large_indices.cu @@ -68,18 +68,18 @@ struct assert_sequence_iterator unexpected_value = nullptr; } - __host__ __device__ assert_sequence_iterator operator+(difference_type i) const + THRUST_HOST_DEVICE assert_sequence_iterator operator+(difference_type i) const { return clone(expected + i); } - __host__ __device__ reference operator[](difference_type i) const + THRUST_HOST_DEVICE reference operator[](difference_type i) const { return clone(expected + i); } // Some weirdness, this iterator acts like its own reference - __device__ assert_sequence_iterator operator=(value_type val) + THRUST_DEVICE assert_sequence_iterator operator=(value_type val) { if (val != expected) { @@ -95,7 +95,7 @@ struct assert_sequence_iterator } private: - __host__ __device__ + THRUST_HOST_DEVICE assert_sequence_iterator clone(value_type new_expected) const { return {new_expected, max, found_max, unexpected_value}; diff --git a/testing/async/exclusive_scan/stateful_operator.cu b/testing/async/exclusive_scan/stateful_operator.cu index 411ffbd99..c2bd3dfd7 100644 --- a/testing/async/exclusive_scan/stateful_operator.cu +++ b/testing/async/exclusive_scan/stateful_operator.cu @@ -15,7 +15,7 @@ struct stateful_operator { T offset; - __host__ __device__ T operator()(T v1, T v2) { return v1 + v2 + offset; } + THRUST_HOST_DEVICE T operator()(T v1, T v2) { return v1 + v2 + offset; } }; // Postfix args overload definition that uses a stateful custom binary operator diff --git a/testing/async/inclusive_scan/large_indices.cu b/testing/async/inclusive_scan/large_indices.cu index b8666aa80..8bc6d9949 100644 --- a/testing/async/inclusive_scan/large_indices.cu +++ b/testing/async/inclusive_scan/large_indices.cu @@ -67,18 +67,18 @@ struct assert_sequence_iterator unexpected_value = nullptr; } - __host__ __device__ assert_sequence_iterator operator+(difference_type i) const + THRUST_HOST_DEVICE assert_sequence_iterator operator+(difference_type i) const { return clone(expected + i); } - __host__ __device__ reference operator[](difference_type i) const + THRUST_HOST_DEVICE reference operator[](difference_type i) const { return clone(expected + i); } // Some weirdness, this iterator acts like its own reference - __device__ assert_sequence_iterator operator=(value_type val) + THRUST_DEVICE assert_sequence_iterator operator=(value_type val) { if (val != expected) { @@ -95,7 +95,7 @@ struct assert_sequence_iterator } private: - __host__ __device__ assert_sequence_iterator + THRUST_HOST_DEVICE assert_sequence_iterator clone(value_type new_expected) const { return {new_expected, max, found_max, unexpected_value}; diff --git a/testing/async/inclusive_scan/stateful_operator.cu b/testing/async/inclusive_scan/stateful_operator.cu index 224c29303..0910a3ef9 100644 --- a/testing/async/inclusive_scan/stateful_operator.cu +++ b/testing/async/inclusive_scan/stateful_operator.cu @@ -15,7 +15,7 @@ struct stateful_operator { T offset; - __host__ __device__ T operator()(T v1, T v2) { return v1 + v2 + offset; } + THRUST_HOST_DEVICE T operator()(T v1, T v2) { return v1 + v2 + offset; } }; // Postfix args overload definition that uses a stateful custom binary operator diff --git a/testing/async_copy.cu b/testing/async_copy.cu index 2666a6c38..e9a300e8f 100644 --- a/testing/async_copy.cu +++ b/testing/async_copy.cu @@ -14,7 +14,7 @@ struct THRUST_PP_CAT2(name, _fn) \ { \ template \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last, OutputIt&& output \ ) const \ @@ -62,7 +62,7 @@ struct test_async_copy_host_to_device template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -97,7 +97,7 @@ struct test_async_copy_device_to_host template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -138,7 +138,7 @@ struct test_async_copy_device_to_device template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -185,7 +185,7 @@ struct test_async_copy_counting_iterator_input_to_device_vector template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::counting_iterator first(0); @@ -247,7 +247,7 @@ struct test_async_copy_counting_iterator_input_to_host_vector template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::counting_iterator first(0); @@ -295,7 +295,7 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( template struct test_async_copy_roundtrip { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -327,7 +327,7 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME( template struct test_async_copy_after { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); diff --git a/testing/async_for_each.cu b/testing/async_for_each.cu index a09adf255..c6d862f02 100644 --- a/testing/async_for_each.cu +++ b/testing/async_for_each.cu @@ -12,7 +12,7 @@ struct THRUST_PP_CAT2(name, _fn) \ { \ template \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last, UnaryFunction&& f \ ) const \ @@ -41,7 +41,7 @@ DEFINE_ASYNC_FOR_EACH_CALLABLE( struct inplace_divide_by_2 { template - __host__ __device__ + THRUST_HOST_DEVICE void operator()(T& x) const { x /= 2; @@ -56,7 +56,7 @@ struct test_async_for_each template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0_data(unittest::random_integers(n)); diff --git a/testing/async_reduce.cu b/testing/async_reduce.cu index bfc9f40b6..d6e53943a 100644 --- a/testing/async_reduce.cu +++ b/testing/async_reduce.cu @@ -15,7 +15,7 @@ template struct custom_plus { - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T lhs, T rhs) const { return lhs + rhs; @@ -44,7 +44,7 @@ struct custom_plus template < \ typename ForwardIt, typename Sentinel \ > \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last \ ) \ @@ -72,7 +72,7 @@ struct custom_plus template < \ typename ForwardIt, typename Sentinel \ > \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last \ ) \ @@ -349,7 +349,7 @@ struct test_async_reduce template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -603,7 +603,7 @@ struct test_async_reduce_counting_iterator template struct tester { - __host__ + THRUST_HOST void operator()() { constexpr std::size_t n = 15 * sizeof(T); @@ -727,7 +727,7 @@ DECLARE_GENERIC_UNITTEST_WITH_TYPES_AND_NAME( template struct test_async_reduce_using { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -772,7 +772,7 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( template struct test_async_reduce_after { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -840,7 +840,7 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( template struct test_async_reduce_on_then_after { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -915,7 +915,7 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( template struct test_async_reduce_allocator_on_then_after { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -1001,7 +1001,7 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( template struct test_async_reduce_caching { - __host__ + THRUST_HOST void operator()(std::size_t n) { constexpr std::int64_t m = 32; @@ -1051,7 +1051,7 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES( template struct test_async_copy_then_reduce { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0a(unittest::random_integers(n)); diff --git a/testing/async_reduce_into.cu b/testing/async_reduce_into.cu index 71c04b770..ae660344e 100644 --- a/testing/async_reduce_into.cu +++ b/testing/async_reduce_into.cu @@ -16,7 +16,7 @@ template struct custom_plus { - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T lhs, T rhs) const { return lhs + rhs; @@ -45,7 +45,7 @@ struct custom_plus template < \ typename ForwardIt, typename Sentinel, typename OutputIt \ > \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last, OutputIt&& output \ ) \ @@ -73,7 +73,7 @@ struct custom_plus template < \ typename ForwardIt, typename Sentinel \ > \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last \ ) \ @@ -370,7 +370,7 @@ struct test_async_reduce_into template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); diff --git a/testing/async_sort.cu b/testing/async_sort.cu index c5cfeae23..fb492941e 100644 --- a/testing/async_sort.cu +++ b/testing/async_sort.cu @@ -24,7 +24,7 @@ enum wait_policy template struct custom_greater { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T rhs, T lhs) const { return lhs > rhs; @@ -38,7 +38,7 @@ struct custom_greater template < \ typename ForwardIt, typename Sentinel \ > \ - __host__ \ + THRUST_HOST \ static void sync( \ ForwardIt&& first, Sentinel&& last \ ) \ @@ -51,7 +51,7 @@ struct custom_greater template < \ typename ForwardIt, typename Sentinel \ > \ - __host__ \ + THRUST_HOST \ static auto async( \ ForwardIt&& first, Sentinel&& last \ ) \ @@ -79,7 +79,7 @@ DEFINE_SORT_INVOKER( template < \ typename ForwardIt, typename Sentinel \ > \ - __host__ \ + THRUST_HOST \ static void sync( \ ForwardIt&& first, Sentinel&& last \ ) \ @@ -92,7 +92,7 @@ DEFINE_SORT_INVOKER( template < \ typename ForwardIt, typename Sentinel \ > \ - __host__ \ + THRUST_HOST \ static auto async( \ ForwardIt&& first, Sentinel&& last \ ) \ @@ -138,7 +138,7 @@ struct test_async_sort template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0_data(unittest::random_integers(n)); diff --git a/testing/async_transform.cu b/testing/async_transform.cu index cc4466ca2..36236ed7a 100644 --- a/testing/async_transform.cu +++ b/testing/async_transform.cu @@ -13,7 +13,7 @@ template struct divide_by_2 { - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T x) const { return x / 2; @@ -43,7 +43,7 @@ struct divide_by_2 typename ForwardIt, typename Sentinel, typename OutputIt \ , typename UnaryOperation \ > \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last, OutputIt&& output \ , UnaryOperation&& op \ @@ -73,7 +73,7 @@ struct divide_by_2 typename ForwardIt, typename Sentinel, typename OutputIt \ , typename UnaryOperation \ > \ - __host__ \ + THRUST_HOST \ auto operator()( \ ForwardIt&& first, Sentinel&& last, OutputIt&& output \ , UnaryOperation&& op \ @@ -166,7 +166,7 @@ struct test_async_transform_unary template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -288,7 +288,7 @@ struct test_async_transform_unary_inplace template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); @@ -398,7 +398,7 @@ struct test_async_transform_unary_counting_iterator template struct tester { - __host__ + THRUST_HOST void operator()() { constexpr std::size_t n = 15 * sizeof(T); @@ -475,7 +475,7 @@ struct test_async_transform_using template struct tester { - __host__ + THRUST_HOST void operator()(std::size_t n) { thrust::host_vector h0(unittest::random_integers(n)); diff --git a/testing/complex.cu b/testing/complex.cu index 89fe42fdc..6bc41a187 100644 --- a/testing/complex.cu +++ b/testing/complex.cu @@ -711,6 +711,23 @@ struct TestComplexStdComplexDeviceInterop ASSERT_ALMOST_EQUAL(vec[2].imag(), thrust::complex(device_vec[2]).imag()); } }; -SimpleUnitTest - TestComplexStdComplexDeviceInteropInstance; +SimpleUnitTest TestComplexStdComplexDeviceInteropInstance; #endif + +template +struct TestComplexExplicitConstruction +{ + struct user_complex + { + THRUST_HOST_DEVICE user_complex(T, T) {} + THRUST_HOST_DEVICE user_complex(const thrust::complex&) {} + }; + + void operator()() + { + const thrust::complex input(42.0, 1337.0); + const user_complex result = thrust::exp(input); + (void) result; + } +}; +SimpleUnitTest TestComplexExplicitConstructionInstance; diff --git a/testing/complex_transform.cu b/testing/complex_transform.cu index 439597a0d..9d531108b 100644 --- a/testing/complex_transform.cu +++ b/testing/complex_transform.cu @@ -10,10 +10,8 @@ struct basic_arithmetic_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x, - const thrust::complex &y) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x, const thrust::complex& y) { // exercise unary and binary arithmetic operators // Should return approximately 1 @@ -23,9 +21,8 @@ struct basic_arithmetic_functor struct complex_plane_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { // Should return a proximately 1 return thrust::proj( (thrust::polar(abs(x),arg(x)) * conj(x))/norm(x)); @@ -34,10 +31,8 @@ struct complex_plane_functor struct pow_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x, - const thrust::complex &y) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x, const thrust::complex& y) { // exercise power functions return pow(x,y); @@ -46,9 +41,8 @@ struct pow_functor struct sqrt_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { // exercise power functions return sqrt(x); @@ -57,9 +51,8 @@ struct sqrt_functor struct log_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return log(x); } // end operator()() @@ -67,9 +60,8 @@ struct log_functor struct exp_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return exp(x); } // end operator()() @@ -77,9 +69,8 @@ struct exp_functor struct log10_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return log10(x); } // end operator()() @@ -88,9 +79,8 @@ struct log10_functor struct cos_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return cos(x); } @@ -98,9 +88,8 @@ struct cos_functor struct sin_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return sin(x); } @@ -108,9 +97,8 @@ struct sin_functor struct tan_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return tan(x); } @@ -120,9 +108,8 @@ struct tan_functor struct cosh_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return cosh(x); } @@ -130,9 +117,8 @@ struct cosh_functor struct sinh_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return sinh(x); } @@ -140,9 +126,8 @@ struct sinh_functor struct tanh_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return tanh(x); } @@ -151,9 +136,8 @@ struct tanh_functor struct acos_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return acos(x); } @@ -161,9 +145,8 @@ struct acos_functor struct asin_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return asin(x); } @@ -171,9 +154,8 @@ struct asin_functor struct atan_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return atan(x); } @@ -182,9 +164,8 @@ struct atan_functor struct acosh_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return acosh(x); } @@ -192,9 +173,8 @@ struct acosh_functor struct asinh_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return asinh(x); } @@ -202,9 +182,8 @@ struct asinh_functor struct atanh_functor { - template - __host__ __device__ - thrust::complex operator()(const thrust::complex &x) + template + THRUST_HOST_DEVICE thrust::complex operator()(const thrust::complex& x) { return atanh(x); } diff --git a/testing/copy.cu b/testing/copy.cu index 27c275419..07c8e8372 100644 --- a/testing/copy.cu +++ b/testing/copy.cu @@ -247,21 +247,21 @@ DECLARE_VECTOR_UNITTEST(TestCopyListTo); template struct is_even { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T x) { return (x & 1) == 0; } }; template struct is_true { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T x) { return x ? true : false; } }; template struct mod_3 { - __host__ __device__ + THRUST_HOST_DEVICE unsigned int operator()(T x) { return x % 3; } }; @@ -455,12 +455,12 @@ struct object_with_non_trivial_ctor int field; int magic; - __host__ __device__ object_with_non_trivial_ctor() + THRUST_HOST_DEVICE object_with_non_trivial_ctor() { magic = MAGIC; field = 0; } - __host__ __device__ object_with_non_trivial_ctor(int f) + THRUST_HOST_DEVICE object_with_non_trivial_ctor(int f) { magic = MAGIC; field = f; @@ -470,7 +470,7 @@ struct object_with_non_trivial_ctor // This non-trivial assignment requires that `this` points to initialized // memory - __host__ __device__ object_with_non_trivial_ctor& + THRUST_HOST_DEVICE object_with_non_trivial_ctor& operator=(const object_with_non_trivial_ctor& x) { // To really copy over x's field value, require we have magic value set. @@ -485,7 +485,7 @@ struct object_with_non_trivial_ctor struct always_true { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const object_with_non_trivial_ctor&) { return true; @@ -732,16 +732,16 @@ struct only_set_when_expected_it long long expected; bool * flag; - __host__ __device__ only_set_when_expected_it operator++() const { return *this; } - __host__ __device__ only_set_when_expected_it operator*() const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator++() const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator*() const { return *this; } template - __host__ __device__ only_set_when_expected_it operator+(Difference) const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator+(Difference) const { return *this; } template - __host__ __device__ only_set_when_expected_it operator+=(Difference) const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator+=(Difference) const { return *this; } template - __host__ __device__ only_set_when_expected_it operator[](Index) const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator[](Index) const { return *this; } - __device__ + THRUST_DEVICE void operator=(long long value) const { if (value == expected) diff --git a/testing/count.cu b/testing/count.cu index f5d4b7d49..25b8f3703 100644 --- a/testing/count.cu +++ b/testing/count.cu @@ -50,7 +50,7 @@ DECLARE_VARIABLE_UNITTEST(TestCount); template struct greater_than_five { - __host__ __device__ bool operator()(const T &x) const {return x > 5;} + THRUST_HOST_DEVICE bool operator()(const T &x) const {return x > 5;} }; template diff --git a/testing/cpp/adjacent_difference.cu b/testing/cpp/adjacent_difference.cu index 584899bec..6d269e17a 100644 --- a/testing/cpp/adjacent_difference.cu +++ b/testing/cpp/adjacent_difference.cu @@ -8,14 +8,14 @@ struct detect_wrong_difference { bool * flag; - __host__ __device__ detect_wrong_difference operator++() const { return *this; } - __host__ __device__ detect_wrong_difference operator*() const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator++() const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator*() const { return *this; } template - __host__ __device__ detect_wrong_difference operator+(Difference) const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator+(Difference) const { return *this; } template - __host__ __device__ detect_wrong_difference operator[](Index) const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator[](Index) const { return *this; } - __device__ + THRUST_DEVICE void operator=(long long difference) const { if (difference != 1) diff --git a/testing/cuda/adjacent_difference.cu b/testing/cuda/adjacent_difference.cu index 9b101ea2e..eeabcdb3e 100644 --- a/testing/cuda/adjacent_difference.cu +++ b/testing/cuda/adjacent_difference.cu @@ -108,14 +108,14 @@ struct detect_wrong_difference bool * flag; - __host__ __device__ detect_wrong_difference operator++() const { return *this; } - __host__ __device__ detect_wrong_difference operator*() const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator++() const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator*() const { return *this; } template - __host__ __device__ detect_wrong_difference operator+(Difference) const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator+(Difference) const { return *this; } template - __host__ __device__ detect_wrong_difference operator[](Index) const { return *this; } + THRUST_HOST_DEVICE detect_wrong_difference operator[](Index) const { return *this; } - __device__ + THRUST_DEVICE void operator=(long long difference) const { if (difference != 1) diff --git a/testing/cuda/copy_if.cu b/testing/cuda/copy_if.cu index 751eca2fc..bc7311bbe 100644 --- a/testing/cuda/copy_if.cu +++ b/testing/cuda/copy_if.cu @@ -7,23 +7,27 @@ template struct is_even { - __host__ __device__ - bool operator()(T x) { return (static_cast(x) & 1) == 0; } + THRUST_HOST_DEVICE bool operator()(T x) + { + return (static_cast(x) & 1) == 0; + } }; template struct mod_3 { - __host__ __device__ - unsigned int operator()(T x) { return static_cast(x) % 3; } + THRUST_HOST_DEVICE unsigned int operator()(T x) + { + return static_cast(x) % 3; + } }; template struct mod_n { T mod; - __host__ __device__ bool operator()(T x) + THRUST_HOST_DEVICE bool operator()(T x) { return (x % mod == 0) ? true : false; } @@ -33,7 +37,7 @@ template struct multiply_n { T multiplier; - __host__ __device__ T operator()(T x) + THRUST_HOST_DEVICE T operator()(T x) { return x * multiplier; } diff --git a/testing/cuda/count.cu b/testing/cuda/count.cu index e2b9b5f5a..8bb87a4ff 100644 --- a/testing/cuda/count.cu +++ b/testing/cuda/count.cu @@ -57,7 +57,10 @@ void count_if_kernel(ExecutionPolicy exec, Iterator first, Iterator last, Predic template struct greater_than_five { - __host__ __device__ bool operator()(const T &x) const {return x > 5;} + THRUST_HOST_DEVICE bool operator()(const T& x) const + { + return x > 5; + } }; diff --git a/testing/cuda/device_side_universal_vector.cu b/testing/cuda/device_side_universal_vector.cu index a6e061773..4f48c9614 100644 --- a/testing/cuda/device_side_universal_vector.cu +++ b/testing/cuda/device_side_universal_vector.cu @@ -3,7 +3,7 @@ #include template -__host__ __device__ void universal_vector_access(VecT &in, thrust::universal_vector &out) +THRUST_HOST_DEVICE void universal_vector_access(VecT& in, thrust::universal_vector& out) { const int expected_front = 4; const int expected_back = 2; diff --git a/testing/cuda/find.cu b/testing/cuda/find.cu index fbd86f5a0..9f590c4e0 100644 --- a/testing/cuda/find.cu +++ b/testing/cuda/find.cu @@ -10,8 +10,10 @@ struct equal_to_value_pred equal_to_value_pred(T value) : value(value) {} - __host__ __device__ - bool operator()(T v) const { return v == value; } + THRUST_HOST_DEVICE bool operator()(T v) const + { + return v == value; + } }; @@ -22,8 +24,10 @@ struct not_equal_to_value_pred not_equal_to_value_pred(T value) : value(value) {} - __host__ __device__ - bool operator()(T v) const { return v != value; } + THRUST_HOST_DEVICE bool operator()(T v) const + { + return v != value; + } }; @@ -34,8 +38,10 @@ struct less_than_value_pred less_than_value_pred(T value) : value(value) {} - __host__ __device__ - bool operator()(T v) const { return v < value; } + THRUST_HOST_DEVICE bool operator()(T v) const + { + return v < value; + } }; diff --git a/testing/cuda/for_each.cu b/testing/cuda/for_each.cu index afd54c621..b656cd214 100644 --- a/testing/cuda/for_each.cu +++ b/testing/cuda/for_each.cu @@ -5,13 +5,20 @@ static const size_t NUM_REGISTERS = 64; -template __host__ __device__ void f (int * x) { int temp = *x; f(x + 1); *x = temp;}; -template <> __host__ __device__ void f<0>(int * /*x*/) { } +template +THRUST_HOST_DEVICE void f(int* x) +{ + int temp = *x; + f(x + 1); + *x = temp; +}; +template <> +THRUST_HOST_DEVICE void f<0>(int* /*x*/) +{} template struct CopyFunctorWithManyRegisters { - __host__ __device__ - void operator()(int * ptr) + THRUST_HOST_DEVICE void operator()(int* ptr) { f(ptr); } @@ -53,9 +60,11 @@ DECLARE_UNITTEST(TestForEachNLargeRegisterFootprint); template struct mark_present_for_each { - T * ptr; - __host__ __device__ void - operator()(T x){ ptr[(int) x] = 1; } + T* ptr; + THRUST_HOST_DEVICE void operator()(T x) + { + ptr[(int) x] = 1; + } }; diff --git a/testing/cuda/gather.cu b/testing/cuda/gather.cu index 6af4d4727..a9805b5af 100644 --- a/testing/cuda/gather.cu +++ b/testing/cuda/gather.cu @@ -99,9 +99,8 @@ void gather_if_kernel(ExecutionPolicy exec, Iterator1 map_first, Iterator1 map_l template struct is_even_gather_if { - __host__ __device__ - bool operator()(const T i) const - { + THRUST_HOST_DEVICE bool operator()(const T i) const + { return (i % 2) == 0; } }; diff --git a/testing/cuda/generate.cu b/testing/cuda/generate.cu index 407da920c..4c806eb59 100644 --- a/testing/cuda/generate.cu +++ b/testing/cuda/generate.cu @@ -7,12 +7,16 @@ template struct return_value { T val; - - return_value(void){} - return_value(T v):val(v){} - - __host__ __device__ - T operator()(void){ return val; } + + return_value() {} + return_value(T v) + : val(v) + {} + + THRUST_HOST_DEVICE T operator()(void) + { + return val; + } }; diff --git a/testing/cuda/is_partitioned.cu b/testing/cuda/is_partitioned.cu index 468e17746..b7da55175 100644 --- a/testing/cuda/is_partitioned.cu +++ b/testing/cuda/is_partitioned.cu @@ -16,7 +16,7 @@ void is_partitioned_kernel(ExecutionPolicy exec, Iterator first, Iterator last, template struct is_even { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T x) const { return ((int) x % 2) == 0; } }; diff --git a/testing/cuda/merge_sort.cu b/testing/cuda/merge_sort.cu index 7a4c2aa2e..cbbb8b77a 100644 --- a/testing/cuda/merge_sort.cu +++ b/testing/cuda/merge_sort.cu @@ -7,7 +7,10 @@ template struct less_div_10 { - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return ((int) lhs) / 10 < ((int) rhs) / 10;} + THRUST_HOST_DEVICE bool operator()(const T& lhs, const T& rhs) const + { + return ((int) lhs) / 10 < ((int) rhs) / 10; + } }; diff --git a/testing/cuda/pair_sort.cu b/testing/cuda/pair_sort.cu index da23e4cb2..e157aa81d 100644 --- a/testing/cuda/pair_sort.cu +++ b/testing/cuda/pair_sort.cu @@ -15,9 +15,8 @@ void stable_sort_kernel(ExecutionPolicy exec, Iterator first, Iterator last) struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() diff --git a/testing/cuda/pair_sort_by_key.cu b/testing/cuda/pair_sort_by_key.cu index fa229b8a6..70207243b 100644 --- a/testing/cuda/pair_sort_by_key.cu +++ b/testing/cuda/pair_sort_by_key.cu @@ -17,9 +17,8 @@ void stable_sort_by_key_kernel(ExecutionPolicy exec, Iterator1 keys_first, Itera struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() diff --git a/testing/cuda/partition.cu b/testing/cuda/partition.cu index 4069682e5..7d65c02b1 100644 --- a/testing/cuda/partition.cu +++ b/testing/cuda/partition.cu @@ -9,8 +9,10 @@ template struct is_even { - __host__ __device__ - bool operator()(T x) const { return ((int) x % 2) == 0; } + THRUST_HOST_DEVICE bool operator()(T x) const + { + return ((int) x % 2) == 0; + } }; @@ -19,7 +21,7 @@ struct mod_n { T mod; bool negate; - __host__ __device__ bool operator()(T x) + THRUST_HOST_DEVICE bool operator()(T x) { return (x % mod == 0) ? (!negate) : negate; } @@ -29,7 +31,7 @@ template struct multiply_n { T multiplier; - __host__ __device__ T operator()(T x) + THRUST_HOST_DEVICE T operator()(T x) { return x * multiplier; } diff --git a/testing/cuda/partition_point.cu b/testing/cuda/partition_point.cu index 57e4344ee..72fe69e8f 100644 --- a/testing/cuda/partition_point.cu +++ b/testing/cuda/partition_point.cu @@ -16,8 +16,10 @@ void partition_point_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 las template struct is_even { - __host__ __device__ - bool operator()(T x) const { return ((int) x % 2) == 0; } + THRUST_HOST_DEVICE bool operator()(T x) const + { + return ((int) x % 2) == 0; + } }; diff --git a/testing/cuda/reduce_by_key.cu b/testing/cuda/reduce_by_key.cu index 20f44fb42..3c0db4e75 100644 --- a/testing/cuda/reduce_by_key.cu +++ b/testing/cuda/reduce_by_key.cu @@ -55,8 +55,10 @@ void reduce_by_key_kernel(ExecutionPolicy exec, template struct is_equal_div_10_reduce { - __host__ __device__ - bool operator()(const T x, const T& y) const { return ((int) x / 10) == ((int) y / 10); } + THRUST_HOST_DEVICE bool operator()(const T x, const T& y) const + { + return ((int) x / 10) == ((int) y / 10); + } }; @@ -302,12 +304,11 @@ class div_op : public thrust::unary_function std::int64_t m_divisor; public: - __host__ div_op(std::int64_t divisor) - : m_divisor(divisor) + THRUST_HOST div_op(std::int64_t divisor) + : m_divisor(divisor) {} - __host__ __device__ - std::int64_t operator()(std::int64_t x) const + THRUST_HOST_DEVICE std::int64_t operator()(std::int64_t x) const { return x / m_divisor; } @@ -319,12 +320,11 @@ class mod_op : public thrust::unary_function std::int64_t m_divisor; public: - __host__ mod_op(std::int64_t divisor) - : m_divisor(divisor) + THRUST_HOST mod_op(std::int64_t divisor) + : m_divisor(divisor) {} - __host__ __device__ - std::int64_t operator()(std::int64_t x) const + THRUST_HOST_DEVICE std::int64_t operator()(std::int64_t x) const { // div: 2 // idx: 0 1 2 3 4 5 diff --git a/testing/cuda/remove.cu b/testing/cuda/remove.cu index 0331c24b8..fd0daa95f 100644 --- a/testing/cuda/remove.cu +++ b/testing/cuda/remove.cu @@ -57,8 +57,10 @@ template struct is_even : thrust::unary_function { - __host__ __device__ - bool operator()(T x) { return (static_cast(x) & 1) == 0; } + THRUST_HOST_DEVICE bool operator()(T x) + { + return (static_cast(x) & 1) == 0; + } }; @@ -66,8 +68,10 @@ template struct is_true : thrust::unary_function { - __host__ __device__ - bool operator()(T x) { return x ? true : false; } + THRUST_HOST_DEVICE bool operator()(T x) + { + return x ? true : false; + } }; diff --git a/testing/cuda/replace.cu b/testing/cuda/replace.cu index bb8b7faa9..e50b689c2 100644 --- a/testing/cuda/replace.cu +++ b/testing/cuda/replace.cu @@ -6,7 +6,10 @@ template struct less_than_five { - __host__ __device__ bool operator()(const T &val) const {return val < 5;} + THRUST_HOST_DEVICE bool operator()(const T& val) const + { + return val < 5; + } }; diff --git a/testing/cuda/scan.cu b/testing/cuda/scan.cu index 5a19798cd..d3c98d3f6 100644 --- a/testing/cuda/scan.cu +++ b/testing/cuda/scan.cu @@ -221,11 +221,10 @@ struct const_ref_plus_mod3 const_ref_plus_mod3(T * table) : table(table) {} - __host__ __device__ - const T& operator()(T a, T b) - { - return table[(int) (a + b)]; - } + THRUST_HOST_DEVICE const T& operator()(T a, T b) + { + return table[(int) (a + b)]; + } }; static void TestInclusiveScanWithConstAccumulator(void) diff --git a/testing/cuda/scatter.cu b/testing/cuda/scatter.cu index 92e7f342a..e07d11a82 100644 --- a/testing/cuda/scatter.cu +++ b/testing/cuda/scatter.cu @@ -66,7 +66,10 @@ void scatter_if_kernel(ExecutionPolicy exec, Iterator1 first, Iterator1 last, It template struct is_even_scatter_if { - __host__ __device__ bool operator()(const T i) const { return (i % 2) == 0; } + THRUST_HOST_DEVICE bool operator()(const T i) const + { + return (i % 2) == 0; + } }; diff --git a/testing/cuda/sort.cu b/testing/cuda/sort.cu index c3d5ff2bc..96f6bb910 100644 --- a/testing/cuda/sort.cu +++ b/testing/cuda/sort.cu @@ -7,8 +7,7 @@ template struct my_less { - __host__ __device__ - bool operator()(const T& lhs, const T& rhs) const + THRUST_HOST_DEVICE bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; } diff --git a/testing/cuda/sort_by_key.cu b/testing/cuda/sort_by_key.cu index ee2b44ea0..8a05fb87d 100644 --- a/testing/cuda/sort_by_key.cu +++ b/testing/cuda/sort_by_key.cu @@ -7,7 +7,7 @@ template struct my_less { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T& lhs, const T& rhs) const { return lhs < rhs; diff --git a/testing/cuda/unique.cu b/testing/cuda/unique.cu index 136ba76fd..2150d500f 100644 --- a/testing/cuda/unique.cu +++ b/testing/cuda/unique.cu @@ -6,8 +6,10 @@ template struct is_equal_div_10_unique { - __host__ __device__ - bool operator()(const T x, const T& y) const { return ((int) x / 10) == ((int) y / 10); } + THRUST_HOST_DEVICE bool operator()(const T x, const T& y) const + { + return ((int) x / 10) == ((int) y / 10); + } }; diff --git a/testing/cuda/unique_by_key.cu b/testing/cuda/unique_by_key.cu index d96cbdc6c..254f73a11 100644 --- a/testing/cuda/unique_by_key.cu +++ b/testing/cuda/unique_by_key.cu @@ -7,8 +7,10 @@ template struct is_equal_div_10_unique { - __host__ __device__ - bool operator()(const T x, const T& y) const { return ((int) x / 10) == ((int) y / 10); } + THRUST_HOST_DEVICE bool operator()(const T x, const T& y) const + { + return ((int) x / 10) == ((int) y / 10); + } }; diff --git a/testing/device_delete.cu b/testing/device_delete.cu index 48d7a8acc..9e82248b5 100644 --- a/testing/device_delete.cu +++ b/testing/device_delete.cu @@ -8,12 +8,12 @@ struct Foo { - __host__ __device__ + THRUST_HOST_DEVICE Foo(void) : set_me_upon_destruction{nullptr} {} - __host__ __device__ + THRUST_HOST_DEVICE ~Foo(void) { NV_IF_TARGET(NV_IS_DEVICE, ( diff --git a/testing/docs/doxybook_test.h b/testing/docs/doxybook_test.h index 244648ee1..8239a553d 100644 --- a/testing/docs/doxybook_test.h +++ b/testing/docs/doxybook_test.h @@ -75,20 +75,17 @@ class test_class /*! \brief Construct a test class. */ - __host__ __device__ constexpr - test_class(int); + THRUST_HOST_DEVICE constexpr test_class(int); /*! \brief \c test_member_function is a function intended to exercise * and test Doxybook rendering. */ - __host__ __device__ constexpr - int test_member_function() = 0; + THRUST_HOST_DEVICE constexpr int test_member_function() = 0; /*! \brief \c test_virtual_member_function is a function intended to exercise * and test Doxybook rendering. */ - __host__ __device__ - virtual int test_virtual_member_function() = 0; + THRUST_HOST_DEVICE virtual int test_virtual_member_function() = 0; /*! \brief \c test_parameter_overflow_member_function is a function intended * to test Doxybook's rendering of function and template parameters that exceed @@ -119,8 +116,7 @@ class test_class /*! \brief \c test_protected_member_function is a function intended to * exercise and test Doxybook rendering. */ - __device__ - auto test_protected_member_function(); + _CCCL_DEVICE auto test_protected_member_function(); }; /*! \brief \c test_derived_class is a derived class intended to exercise and @@ -138,8 +134,7 @@ class test_derived_class : test_class /*! \brief \c test_derived_member_function is a function intended to exercise * and test Doxybook rendering. */ - __host__ __device__ constexpr - double test_derived_member_function(int, int); + THRUST_HOST_DEVICE constexpr double test_derived_member_function(int, int); }; /*! \brief \c test_function is a function intended to exercise and test Doxybook diff --git a/testing/equal.cu b/testing/equal.cu index 5ce599c54..2fb05e6e8 100644 --- a/testing/equal.cu +++ b/testing/equal.cu @@ -126,7 +126,7 @@ struct only_set_when_both_expected long long expected; bool * flag; - __device__ + THRUST_DEVICE bool operator()(long long x, long long y) { if (x == expected && y == expected) diff --git a/testing/event.cu b/testing/event.cu index 581426919..4f4913d32 100644 --- a/testing/event.cu +++ b/testing/event.cu @@ -9,8 +9,7 @@ /////////////////////////////////////////////////////////////////////////////// -__host__ -void test_event_default_constructed() +THRUST_HOST void test_event_default_constructed() { THRUST_STATIC_ASSERT( (std::is_same< @@ -53,8 +52,7 @@ DECLARE_UNITTEST(test_event_default_constructed); /////////////////////////////////////////////////////////////////////////////// -__host__ -void test_event_new_stream() +THRUST_HOST void test_event_new_stream() { auto e0 = thrust::device_event(thrust::new_stream); @@ -70,8 +68,7 @@ DECLARE_UNITTEST(test_event_new_stream); /////////////////////////////////////////////////////////////////////////////// -__host__ -void test_event_linear_chaining() +THRUST_HOST void test_event_linear_chaining() { constexpr std::int64_t n = 1024; @@ -111,8 +108,7 @@ DECLARE_UNITTEST(test_event_linear_chaining); /////////////////////////////////////////////////////////////////////////////// -__host__ -void test_event_when_all() +THRUST_HOST void test_event_when_all() { // Create events with new streams. auto e0 = thrust::when_all(); diff --git a/testing/fill.cu b/testing/fill.cu index dcb5f0a32..16e2767b7 100644 --- a/testing/fill.cu +++ b/testing/fill.cu @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -348,14 +348,14 @@ struct TypeWithNonTrivialAssigment { int x, y, z; - __host__ __device__ + THRUST_HOST_DEVICE TypeWithNonTrivialAssigment() : x(0), y(0), z(0) {} #if THRUST_CPP_DIALECT >= 2011 TypeWithNonTrivialAssigment(const TypeWithNonTrivialAssigment &) = default; #endif - __host__ __device__ + THRUST_HOST_DEVICE TypeWithNonTrivialAssigment& operator=(const TypeWithNonTrivialAssigment& t) { x = t.x; @@ -364,7 +364,7 @@ struct TypeWithNonTrivialAssigment return *this; } - __host__ __device__ + THRUST_HOST_DEVICE bool operator==(const TypeWithNonTrivialAssigment& t) const { return x == t.x && y == t.y && z == t.z; diff --git a/testing/find.cu b/testing/find.cu index 988afbeef..23b3d83a6 100644 --- a/testing/find.cu +++ b/testing/find.cu @@ -11,7 +11,7 @@ struct equal_to_value_pred equal_to_value_pred(T value) : value(value) {} - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T v) const { return v == value; } }; @@ -22,7 +22,7 @@ struct not_equal_to_value_pred not_equal_to_value_pred(T value) : value(value) {} - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T v) const { return v != value; } }; @@ -33,7 +33,7 @@ struct less_than_value_pred less_than_value_pred(T value) : value(value) {} - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T v) const { return v < value; } }; @@ -348,11 +348,11 @@ class Weird int value; public: - __host__ __device__ Weird(int val, int) + THRUST_HOST_DEVICE Weird(int val, int) : value(val) {} - friend __host__ __device__ + friend THRUST_HOST_DEVICE bool operator==(int x, Weird y) { return x == y.value; diff --git a/testing/for_each.cu b/testing/for_each.cu index e5af8761e..391ab2c19 100644 --- a/testing/for_each.cu +++ b/testing/for_each.cu @@ -31,7 +31,7 @@ class mark_present_for_each { public: T * ptr; - __host__ __device__ void operator()(T x){ ptr[(int) x] = 1; } + THRUST_HOST_DEVICE void operator()(T x){ ptr[(int) x] = 1; } }; template @@ -62,7 +62,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestForEachSimple); template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each(my_system &system, InputIterator first, InputIterator, Function) { system.validate_dispatch(); @@ -82,7 +82,7 @@ DECLARE_UNITTEST(TestForEachDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each(my_tag, InputIterator first, InputIterator, Function) { *first = 13; @@ -130,7 +130,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestForEachNSimple); template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each_n(my_system &system, InputIterator first, Size, Function) { system.validate_dispatch(); @@ -150,7 +150,7 @@ DECLARE_UNITTEST(TestForEachNDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator for_each_n(my_tag, InputIterator first, Size, Function) { *first = 13; @@ -285,7 +285,7 @@ struct SetFixedVectorToConstant SetFixedVectorToConstant(T scalar) : exemplar(scalar) {} - __host__ __device__ + THRUST_HOST_DEVICE void operator()(FixedVector& t) { t = exemplar; @@ -381,7 +381,7 @@ struct only_set_when_expected unsigned long long expected; bool * flag; - __device__ + THRUST_DEVICE void operator()(unsigned long long x) { if (x == expected) diff --git a/testing/functional_placeholders_arithmetic.cu b/testing/functional_placeholders_arithmetic.cu index 8d8535aa6..724774796 100644 --- a/testing/functional_placeholders_arithmetic.cu +++ b/testing/functional_placeholders_arithmetic.cu @@ -64,7 +64,7 @@ DECLARE_VECTOR_UNITTEST(TestFunctionalPlaceholders##name); template struct unary_plus_reference { - __host__ __device__ T operator()(const T &x) const + THRUST_HOST_DEVICE T operator()(const T &x) const { // Static cast to undo integral promotion return static_cast(+x); } diff --git a/testing/functional_placeholders_bitwise.cu b/testing/functional_placeholders_bitwise.cu index 7c92d967f..05d8c0442 100644 --- a/testing/functional_placeholders_bitwise.cu +++ b/testing/functional_placeholders_bitwise.cu @@ -71,7 +71,7 @@ BINARY_FUNCTIONAL_PLACEHOLDERS_TEST(BitXor, ^, thrust::bit_xor, SmallIntegralTyp template struct bit_negate_reference { - __host__ __device__ T operator()(const T &x) const + THRUST_HOST_DEVICE T operator()(const T &x) const { return ~x; } diff --git a/testing/functional_placeholders_compound_assignment.cu b/testing/functional_placeholders_compound_assignment.cu index 512fa73fa..ec1b2af72 100644 --- a/testing/functional_placeholders_compound_assignment.cu +++ b/testing/functional_placeholders_compound_assignment.cu @@ -37,31 +37,31 @@ VectorUnitTest struct plus_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs += rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs += rhs; } }; template struct minus_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs -= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs -= rhs; } }; template struct multiplies_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs *= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs *= rhs; } }; template struct divides_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs /= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs /= rhs; } }; template struct modulus_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs %= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs %= rhs; } }; BINARY_FUNCTIONAL_PLACEHOLDERS_TEST(PlusEqual, +=, plus_equal_reference, ThirtyTwoBitTypes); @@ -73,31 +73,31 @@ BINARY_FUNCTIONAL_PLACEHOLDERS_TEST(ModulusEqual, %=, modulus_equal_reference template struct bit_and_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs &= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs &= rhs; } }; template struct bit_or_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs |= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs |= rhs; } }; template struct bit_xor_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs ^= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs ^= rhs; } }; template struct bit_lshift_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs <<= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs <<= rhs; } }; template struct bit_rshift_equal_reference { - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs >>= rhs; } + THRUST_HOST_DEVICE T& operator()(T &lhs, const T &rhs) const { return lhs >>= rhs; } }; BINARY_FUNCTIONAL_PLACEHOLDERS_TEST(BitAndEqual, &=, bit_and_equal_reference, SmallIntegralTypes); @@ -121,25 +121,25 @@ BINARY_FUNCTIONAL_PLACEHOLDERS_TEST(BitRshiftEqual, >>=, bit_rshift_equal_refere template struct prefix_increment_reference { - __host__ __device__ T& operator()(T &x) const { return ++x; } + THRUST_HOST_DEVICE T& operator()(T &x) const { return ++x; } }; template struct suffix_increment_reference { - __host__ __device__ T operator()(T &x) const { return x++; } + THRUST_HOST_DEVICE T operator()(T &x) const { return x++; } }; template struct prefix_decrement_reference { - __host__ __device__ T& operator()(T &x) const { return --x; } + THRUST_HOST_DEVICE T& operator()(T &x) const { return --x; } }; template struct suffix_decrement_reference { - __host__ __device__ T operator()(T &x) const { return x--; } + THRUST_HOST_DEVICE T operator()(T &x) const { return x--; } }; #define PREFIX_FUNCTIONAL_PLACEHOLDERS_TEST(name, reference_operator, functor) \ diff --git a/testing/functional_placeholders_miscellaneous.cu b/testing/functional_placeholders_miscellaneous.cu index d6774211b..24a176080 100644 --- a/testing/functional_placeholders_miscellaneous.cu +++ b/testing/functional_placeholders_miscellaneous.cu @@ -5,11 +5,11 @@ template struct saxpy_reference { - __host__ __device__ saxpy_reference(const T &aa) - : a(aa) + THRUST_HOST_DEVICE saxpy_reference(const T& aa) + : a(aa) {} - __host__ __device__ T operator()(const T &x, const T &y) const + THRUST_HOST_DEVICE T operator()(const T& x, const T& y) const { return a * x + y; } diff --git a/testing/future.cu b/testing/future.cu index eb1ab582a..da2b73832 100644 --- a/testing/future.cu +++ b/testing/future.cu @@ -33,8 +33,7 @@ using future_value_types = unittest::type_list< template struct test_future_default_constructed { - __host__ - void operator()() + THRUST_HOST void operator()() { THRUST_STATIC_ASSERT( (std::is_same< @@ -97,8 +96,7 @@ DECLARE_GENERIC_UNITTEST_WITH_TYPES( template struct test_future_new_stream { - __host__ - void operator()() + THRUST_HOST void operator()() { auto f0 = thrust::device_future(thrust::new_stream); @@ -134,8 +132,7 @@ DECLARE_GENERIC_UNITTEST_WITH_TYPES( template struct test_future_convert_to_event { - __host__ - void operator()() + THRUST_HOST void operator()() { auto f0 = thrust::device_future(thrust::new_stream); @@ -164,8 +161,7 @@ DECLARE_GENERIC_UNITTEST_WITH_TYPES( template struct test_future_when_all { - __host__ - void operator()() + THRUST_HOST void operator()() { // Create futures with new streams. auto f0 = thrust::device_future(thrust::new_stream); diff --git a/testing/gather.cu b/testing/gather.cu index cfd33c0a2..e7eca84f1 100644 --- a/testing/gather.cu +++ b/testing/gather.cu @@ -179,7 +179,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestGatherIfSimple); template struct is_even_gather_if { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T i) const { return (i % 2) == 0; diff --git a/testing/generate.cu b/testing/generate.cu index 1dc5d30b3..e119f9767 100644 --- a/testing/generate.cu +++ b/testing/generate.cu @@ -30,7 +30,7 @@ struct return_value return_value(void){} return_value(T v):val(v){} - __host__ __device__ + THRUST_HOST_DEVICE T operator()(void){ return val; } }; @@ -57,7 +57,7 @@ DECLARE_VECTOR_UNITTEST(TestGenerateSimple); template -__host__ __device__ +THRUST_HOST_DEVICE void generate(my_system &system, ForwardIterator /*first*/, ForwardIterator, Generator) { system.validate_dispatch(); @@ -76,7 +76,7 @@ DECLARE_UNITTEST(TestGenerateDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE void generate(my_tag, ForwardIterator first, ForwardIterator, Generator) { *first = 13; @@ -149,7 +149,7 @@ DECLARE_VECTOR_UNITTEST(TestGenerateNSimple); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator generate_n(my_system &system, ForwardIterator first, Size, Generator) { system.validate_dispatch(); @@ -169,7 +169,7 @@ DECLARE_UNITTEST(TestGenerateNDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator generate_n(my_tag, ForwardIterator first, Size, Generator) { *first = 13; diff --git a/testing/generate_const_iterators.cu b/testing/generate_const_iterators.cu index fd12bfb3b..2291b2ae7 100644 --- a/testing/generate_const_iterators.cu +++ b/testing/generate_const_iterators.cu @@ -4,7 +4,7 @@ struct generator { - __host__ __device__ + THRUST_HOST_DEVICE int operator()() const { return 1; diff --git a/testing/inner_product.cu b/testing/inner_product.cu index 4fae72e88..5887c8309 100644 --- a/testing/inner_product.cu +++ b/testing/inner_product.cu @@ -110,7 +110,7 @@ struct only_set_when_both_expected long long expected; bool * flag; - __device__ + THRUST_DEVICE long long operator()(long long x, long long y) { if (x == expected && y == expected) diff --git a/testing/is_contiguous_iterator.cu b/testing/is_contiguous_iterator.cu index 42a5aa663..673492fb0 100644 --- a/testing/is_contiguous_iterator.cu +++ b/testing/is_contiguous_iterator.cu @@ -43,7 +43,7 @@ THRUST_STATIC_ASSERT((!thrust::is_contiguous_iterator< >::value)); template -__host__ +THRUST_HOST void test_is_contiguous_iterator() { THRUST_STATIC_ASSERT((thrust::is_contiguous_iterator< @@ -125,7 +125,7 @@ void test_is_contiguous_iterator() DECLARE_GENERIC_UNITTEST(test_is_contiguous_iterator); template -__host__ +THRUST_HOST void test_is_contiguous_iterator_vectors() { THRUST_STATIC_ASSERT((thrust::is_contiguous_iterator< diff --git a/testing/is_operator_function_object.cu b/testing/is_operator_function_object.cu index 935ee1e55..52a90e075 100644 --- a/testing/is_operator_function_object.cu +++ b/testing/is_operator_function_object.cu @@ -26,8 +26,7 @@ THRUST_STATIC_ASSERT((thrust::is_operator_plus_function_object< #endif template -__host__ -void test_is_operator_less_function_object() +THRUST_HOST void test_is_operator_less_function_object() { THRUST_STATIC_ASSERT((thrust::is_operator_less_function_object< thrust::less @@ -68,8 +67,7 @@ void test_is_operator_less_function_object() DECLARE_GENERIC_UNITTEST(test_is_operator_less_function_object); template -__host__ -void test_is_operator_greater_function_object() +THRUST_HOST void test_is_operator_greater_function_object() { THRUST_STATIC_ASSERT((!thrust::is_operator_greater_function_object< thrust::less @@ -110,8 +108,7 @@ void test_is_operator_greater_function_object() DECLARE_GENERIC_UNITTEST(test_is_operator_greater_function_object); template -__host__ -void test_is_operator_less_or_greater_function_object() +THRUST_HOST void test_is_operator_less_or_greater_function_object() { THRUST_STATIC_ASSERT((thrust::is_operator_less_or_greater_function_object< thrust::less @@ -152,8 +149,7 @@ void test_is_operator_less_or_greater_function_object() DECLARE_GENERIC_UNITTEST(test_is_operator_less_or_greater_function_object); template -__host__ -void test_is_operator_plus_function_object() +THRUST_HOST void test_is_operator_plus_function_object() { THRUST_STATIC_ASSERT((thrust::is_operator_plus_function_object< thrust::plus diff --git a/testing/is_partitioned.cu b/testing/is_partitioned.cu index dab7f4e98..eb25ee9ff 100644 --- a/testing/is_partitioned.cu +++ b/testing/is_partitioned.cu @@ -23,8 +23,10 @@ template struct is_even { - __host__ __device__ - bool operator()(T x) const { return ((int) x % 2) == 0; } + THRUST_HOST_DEVICE bool operator()(T x) const + { + return ((int) x % 2) == 0; + } }; template @@ -79,7 +81,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestIsPartitioned); template -__host__ __device__ +THRUST_HOST_DEVICE bool is_partitioned(my_system &system, InputIterator /*first*/, InputIterator, Predicate) { system.validate_dispatch(); @@ -99,7 +101,7 @@ DECLARE_UNITTEST(TestIsPartitionedDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE bool is_partitioned(my_tag, InputIterator first, InputIterator, Predicate) { *first = 13; diff --git a/testing/is_sorted_until.cu b/testing/is_sorted_until.cu index a561d5eb5..cff7fd3a2 100644 --- a/testing/is_sorted_until.cu +++ b/testing/is_sorted_until.cu @@ -115,7 +115,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestIsSortedUntil); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(my_system &system, ForwardIterator first, ForwardIterator) { system.validate_dispatch(); @@ -135,7 +135,7 @@ DECLARE_UNITTEST(TestIsSortedUntilExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator is_sorted_until(my_tag, ForwardIterator first, ForwardIterator) { *first = 13; diff --git a/testing/logical.cu b/testing/logical.cu index 8c532eac5..76d6e5f62 100644 --- a/testing/logical.cu +++ b/testing/logical.cu @@ -43,7 +43,7 @@ DECLARE_VECTOR_UNITTEST(TestAllOf); template -__host__ __device__ +THRUST_HOST_DEVICE bool all_of(my_system &system, InputIterator, InputIterator, Predicate) { system.validate_dispatch(); @@ -63,7 +63,7 @@ DECLARE_UNITTEST(TestAllOfDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE bool all_of(my_tag, InputIterator first, InputIterator, Predicate) { *first = 13; @@ -105,7 +105,7 @@ DECLARE_VECTOR_UNITTEST(TestAnyOf); template -__host__ __device__ +THRUST_HOST_DEVICE bool any_of(my_system &system, InputIterator, InputIterator, Predicate) { system.validate_dispatch(); @@ -125,7 +125,7 @@ DECLARE_UNITTEST(TestAnyOfDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE bool any_of(my_tag, InputIterator first, InputIterator, Predicate) { *first = 13; @@ -167,7 +167,7 @@ DECLARE_VECTOR_UNITTEST(TestNoneOf); template -__host__ __device__ +THRUST_HOST_DEVICE bool none_of(my_system &system, InputIterator, InputIterator, Predicate) { system.validate_dispatch(); @@ -187,7 +187,7 @@ DECLARE_UNITTEST(TestNoneOfDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE bool none_of(my_tag, InputIterator first, InputIterator, Predicate) { *first = 13; diff --git a/testing/mr_disjoint_pool.cu b/testing/mr_disjoint_pool.cu index 69a6005ec..515cae452 100644 --- a/testing/mr_disjoint_pool.cu +++ b/testing/mr_disjoint_pool.cu @@ -15,7 +15,7 @@ struct alloc_id std::size_t alignment; std::size_t offset; - __host__ __device__ + THRUST_HOST_DEVICE bool operator==(const alloc_id & other) const { return id == other.id && size == other.size && alignment == other.alignment; diff --git a/testing/mr_pool.cu b/testing/mr_pool.cu index d4f8a3056..16871d03a 100644 --- a/testing/mr_pool.cu +++ b/testing/mr_pool.cu @@ -40,12 +40,12 @@ struct tracked_pointer : thrust::iterator_facade< std::size_t offset; void * ptr; - __host__ __device__ + THRUST_HOST_DEVICE explicit tracked_pointer(T * ptr = NULL) : id(), size(), alignment(), offset(), ptr(ptr) { } - __host__ __device__ + THRUST_HOST_DEVICE ~tracked_pointer() { } @@ -62,13 +62,13 @@ struct tracked_pointer : thrust::iterator_facade< return ret; } - __host__ __device__ + THRUST_HOST_DEVICE std::ptrdiff_t distance_to(const tracked_pointer & other) const { return static_cast(other.ptr) - static_cast(ptr); } - __host__ __device__ + THRUST_HOST_DEVICE T * get() const { return static_cast(ptr); @@ -76,32 +76,32 @@ struct tracked_pointer : thrust::iterator_facade< // globally qualified, because MSVC somehow prefers the name from the dependent base // of this class over the `reference` template that's visible in the global namespace of this file... - __host__ __device__ + THRUST_HOST_DEVICE typename ::reference::type dereference() const { return *get(); } - __host__ __device__ + THRUST_HOST_DEVICE void increment() { advance(1); } - __host__ __device__ + THRUST_HOST_DEVICE void decrement() { advance(-1); } - __host__ __device__ + THRUST_HOST_DEVICE void advance(std::ptrdiff_t diff) { ptr = get() + diff; offset += diff * sizeof(T); } - __host__ __device__ + THRUST_HOST_DEVICE bool equal(const tracked_pointer & other) const { return id == other.id && size == other.size && alignment == other.alignment && offset == other.offset && ptr == other.ptr; diff --git a/testing/out_of_memory_recovery.cu b/testing/out_of_memory_recovery.cu index 5e4f0c327..5972ee22a 100644 --- a/testing/out_of_memory_recovery.cu +++ b/testing/out_of_memory_recovery.cu @@ -16,8 +16,8 @@ struct non_trivial { - __host__ __device__ non_trivial() {} - __host__ __device__ ~non_trivial() {} + THRUST_HOST_DEVICE non_trivial() {} + THRUST_HOST_DEVICE ~non_trivial() {} }; void test_out_of_memory_recovery() diff --git a/testing/pair_reduce.cu b/testing/pair_reduce.cu index 6682fb3cc..3c2371718 100644 --- a/testing/pair_reduce.cu +++ b/testing/pair_reduce.cu @@ -5,9 +5,8 @@ struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() @@ -17,8 +16,7 @@ struct make_pair_functor struct add_pairs { template - __host__ __device__ - Pair1 operator()(const Pair1 &x, const Pair2 &y) + THRUST_HOST_DEVICE Pair1 operator()(const Pair1& x, const Pair2& y) { // Need cast to undo integer promotion, decltype(char{} + char{}) == int using P1T1 = typename Pair1::first_type; diff --git a/testing/pair_scan.cu b/testing/pair_scan.cu index 5554c6dc4..6f70ae1c8 100644 --- a/testing/pair_scan.cu +++ b/testing/pair_scan.cu @@ -9,9 +9,8 @@ struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() @@ -21,8 +20,7 @@ struct make_pair_functor struct add_pairs { template - __host__ __device__ - Pair1 operator()(const Pair1 &x, const Pair2 &y) + THRUST_HOST_DEVICE Pair1 operator()(const Pair1& x, const Pair2& y) { return thrust::make_pair(x.first + y.first, x.second + y.second); } // end operator() diff --git a/testing/pair_scan_by_key.cu b/testing/pair_scan_by_key.cu index 21b53bcbe..a02f94b3f 100644 --- a/testing/pair_scan_by_key.cu +++ b/testing/pair_scan_by_key.cu @@ -5,9 +5,8 @@ struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() @@ -17,8 +16,7 @@ struct make_pair_functor struct add_pairs { template - __host__ __device__ - Pair1 operator()(const Pair1 &x, const Pair2 &y) + THRUST_HOST_DEVICE Pair1 operator()(const Pair1& x, const Pair2& y) { // Need cast to undo integer promotion, decltype(char{} + char{}) == int using P1T1 = typename Pair1::first_type; diff --git a/testing/pair_sort.cu b/testing/pair_sort.cu index db8a83739..92cebd536 100644 --- a/testing/pair_sort.cu +++ b/testing/pair_sort.cu @@ -5,9 +5,8 @@ struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() diff --git a/testing/pair_sort_by_key.cu b/testing/pair_sort_by_key.cu index 811368aed..8d01e96df 100644 --- a/testing/pair_sort_by_key.cu +++ b/testing/pair_sort_by_key.cu @@ -4,9 +4,8 @@ struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() diff --git a/testing/pair_transform.cu b/testing/pair_transform.cu index 612a77af0..33610718e 100644 --- a/testing/pair_transform.cu +++ b/testing/pair_transform.cu @@ -6,9 +6,8 @@ struct make_pair_functor { - template - __host__ __device__ - thrust::pair operator()(const T1 &x, const T2 &y) + template + THRUST_HOST_DEVICE thrust::pair operator()(const T1& x, const T2& y) { return thrust::make_pair(x,y); } // end operator()() @@ -17,8 +16,7 @@ struct make_pair_functor struct add_pairs { template - __host__ __device__ - Pair1 operator()(const Pair1 &x, const Pair2 &y) + THRUST_HOST_DEVICE Pair1 operator()(const Pair1& x, const Pair2& y) { return thrust::make_pair(x.first + y.first, x.second + y.second); } // end operator() diff --git a/testing/partition.cu b/testing/partition.cu index fe450a1a9..b3a6bfefb 100644 --- a/testing/partition.cu +++ b/testing/partition.cu @@ -32,7 +32,7 @@ template struct is_even { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T x) const { return ((int) x % 2) == 0; } }; @@ -1034,7 +1034,7 @@ VariableUnitTest - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const Tuple& t) const { return thrust::get<0>(t) <= thrust::get<1>(t); @@ -1200,7 +1200,7 @@ DECLARE_VECTOR_UNITTEST(TestStablePartitionStencilZipIterator); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(my_system &system, ForwardIterator first, ForwardIterator, @@ -1228,7 +1228,7 @@ DECLARE_UNITTEST(TestPartitionDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(my_system &system, ForwardIterator first, ForwardIterator, @@ -1257,7 +1257,7 @@ DECLARE_UNITTEST(TestPartitionStencilDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(my_tag, ForwardIterator first, ForwardIterator, @@ -1283,7 +1283,7 @@ DECLARE_UNITTEST(TestPartitionDispatchImplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition(my_tag, ForwardIterator first, ForwardIterator, @@ -1311,7 +1311,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair partition_copy(my_system &system, InputIterator, @@ -1346,7 +1346,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair partition_copy(my_system &system, InputIterator1, @@ -1382,7 +1382,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair partition_copy(my_tag, InputIterator first, @@ -1415,7 +1415,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair partition_copy(my_tag, InputIterator1 first, @@ -1447,7 +1447,7 @@ DECLARE_UNITTEST(TestPartitionCopyStencilDispatchImplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(my_system &system, ForwardIterator first, ForwardIterator, @@ -1475,7 +1475,7 @@ DECLARE_UNITTEST(TestStablePartitionDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(my_system &system, ForwardIterator first, ForwardIterator, @@ -1504,7 +1504,7 @@ DECLARE_UNITTEST(TestStablePartitionStencilDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(my_tag, ForwardIterator first, ForwardIterator, @@ -1530,7 +1530,7 @@ DECLARE_UNITTEST(TestStablePartitionDispatchImplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator stable_partition(my_tag, ForwardIterator first, ForwardIterator, @@ -1559,7 +1559,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair stable_partition_copy(my_system &system, InputIterator, @@ -1594,7 +1594,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair stable_partition_copy(my_system &system, InputIterator1, @@ -1630,7 +1630,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair stable_partition_copy(my_tag, InputIterator first, @@ -1663,7 +1663,7 @@ template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair stable_partition_copy(my_tag, InputIterator1 first, diff --git a/testing/partition_point.cu b/testing/partition_point.cu index 05636c08d..499cccc6c 100644 --- a/testing/partition_point.cu +++ b/testing/partition_point.cu @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ template struct is_even { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T x) const { return ((int) x % 2) == 0; } }; @@ -66,7 +66,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestPartitionPoint); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition_point(my_system &system, ForwardIterator first, ForwardIterator, @@ -92,7 +92,7 @@ DECLARE_UNITTEST(TestPartitionPointDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator partition_point(my_tag, ForwardIterator first, ForwardIterator, @@ -118,7 +118,7 @@ struct test_less_than { long long expected; - __device__ + THRUST_DEVICE bool operator()(long long y) { return y < expected; diff --git a/testing/random.cu b/testing/random.cu index 43298bd46..c85360aec 100644 --- a/testing/random.cu +++ b/testing/random.cu @@ -6,13 +6,11 @@ template struct ValidateEngine { - __host__ __device__ - ValidateEngine(const typename Engine::result_type value_10000) - : m_value_10000(value_10000) + THRUST_HOST_DEVICE ValidateEngine(const typename Engine::result_type value_10000) + : m_value_10000(value_10000) {} - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { Engine e; e.discard(9999); @@ -29,8 +27,7 @@ template struct ValidateEngineMin { - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { Engine e; @@ -48,8 +45,7 @@ template struct ValidateEngineMin { - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { return true; } @@ -59,8 +55,7 @@ template template struct ValidateEngineMax { - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { Engine e; @@ -79,8 +74,7 @@ template template struct ValidateEngineEqual { - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { bool result = true; @@ -113,8 +107,7 @@ template template struct ValidateEngineUnequal { - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { bool result = true; @@ -155,13 +148,11 @@ template { typedef Engine random_engine; - __host__ __device__ - ValidateDistributionMin(const Distribution &dd) - : d(dd) + THRUST_HOST_DEVICE ValidateDistributionMin(const Distribution& dd) + : d(dd) {} - __host__ __device__ - bool operator()(void) + THRUST_HOST_DEVICE bool operator()(void) { Engine e; @@ -184,13 +175,11 @@ template { typedef Engine random_engine; - __host__ __device__ - ValidateDistributionMax(const Distribution &dd) - : d(dd) + THRUST_HOST_DEVICE ValidateDistributionMax(const Distribution& dd) + : d(dd) {} - __host__ __device__ - bool operator()(void) + THRUST_HOST_DEVICE bool operator()(void) { Engine e; @@ -211,8 +200,7 @@ template template struct ValidateDistributionEqual { - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { return d0 == d1; } @@ -224,8 +212,7 @@ template template struct ValidateDistributionUnqual { - __host__ __device__ - bool operator()(void) const + THRUST_HOST_DEVICE bool operator()(void) const { return d0 != d1; } diff --git a/testing/reduce.cu b/testing/reduce.cu index cb08bc889..b4f4f5365 100644 --- a/testing/reduce.cu +++ b/testing/reduce.cu @@ -8,7 +8,7 @@ template struct plus_mod_10 { - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T lhs, T rhs) const { return ((lhs % 10) + (rhs % 10)) % 10; @@ -18,7 +18,7 @@ template template struct is_equal_div_10_reduce { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T x, const T& y) const { return ((int) x / 10) == ((int) y / 10); } }; @@ -155,7 +155,7 @@ struct plus_mod3 plus_mod3(T * table) : table(table) {} - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T a, T b) { return table[(int) (a + b)]; diff --git a/testing/reduce_by_key.cu b/testing/reduce_by_key.cu index f8539c066..f27e6ffcb 100644 --- a/testing/reduce_by_key.cu +++ b/testing/reduce_by_key.cu @@ -7,7 +7,7 @@ template struct is_equal_div_10_reduce { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T x, const T& y) const { return ((int) x / 10) == ((int) y / 10); } }; diff --git a/testing/regression/gh_919_nvbug_2318871__zip_iterator_with_complex.cu b/testing/regression/gh_919_nvbug_2318871__zip_iterator_with_complex.cu index 3904933f3..cd352379c 100644 --- a/testing/regression/gh_919_nvbug_2318871__zip_iterator_with_complex.cu +++ b/testing/regression/gh_919_nvbug_2318871__zip_iterator_with_complex.cu @@ -10,8 +10,7 @@ struct greater_than_5 { template - __host__ __device__ - bool operator()(T val) + THRUST_HOST_DEVICE bool operator()(T val) { return abs(val) > 5; } diff --git a/testing/regression/nvbug_1940974__merge_with_constant_iterator.cu b/testing/regression/nvbug_1940974__merge_with_constant_iterator.cu index 646fdc558..6b6c8b623 100644 --- a/testing/regression/nvbug_1940974__merge_with_constant_iterator.cu +++ b/testing/regression/nvbug_1940974__merge_with_constant_iterator.cu @@ -7,7 +7,7 @@ struct comp { template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const Tuple1& t1, const Tuple2& t2) { return thrust::get<0>(t1) == thrust::get<1>(t2); diff --git a/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.cu b/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.cu index f06945328..1c276571e 100644 --- a/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.cu +++ b/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.cu @@ -1,7 +1,7 @@ #include #include -inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) +inline THRUST_HOST_DEVICE uint2 operator+(uint2 a, uint2 b) { return make_uint2(a.x + b.x, a.y + b.y); } diff --git a/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed0.cu b/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed0.cu index f987c2f3f..758bb0080 100644 --- a/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed0.cu +++ b/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed0.cu @@ -3,7 +3,7 @@ struct uint2_adder { - __host__ __device__ uint2 operator()(uint2 a, uint2 b) { + THRUST_HOST_DEVICE uint2 operator()(uint2 a, uint2 b) { return make_uint2(a.x + b.x, a.y + b.y); } }; diff --git a/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed1.cu b/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed1.cu index 4ccf67d39..5a71a9893 100644 --- a/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed1.cu +++ b/testing/regression/nvbug_1990211__scan_requires_assignability_from_zero.fixed1.cu @@ -1,7 +1,7 @@ #include #include -inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) +inline THRUST_HOST_DEVICE uint2 operator+(uint2 a, uint2 b) { return make_uint2(a.x + b.x, a.y + b.y); } diff --git a/testing/remove.cu b/testing/remove.cu index 97b057014..e9e793dd4 100644 --- a/testing/remove.cu +++ b/testing/remove.cu @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,7 @@ template struct is_even : thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T x) { return (static_cast(x) & 1) == 0; } }; @@ -37,7 +37,7 @@ template struct is_true : thrust::unary_function { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T x) { return x ? true : false; } }; @@ -68,7 +68,7 @@ DECLARE_VECTOR_UNITTEST(TestRemoveSimple); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove(my_system &system, ForwardIterator first, ForwardIterator, @@ -92,7 +92,7 @@ DECLARE_UNITTEST(TestRemoveDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove(my_tag, ForwardIterator first, ForwardIterator, @@ -146,7 +146,7 @@ DECLARE_VECTOR_UNITTEST(TestRemoveCopySimple); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy(my_system &system, InputIterator, InputIterator, @@ -176,7 +176,7 @@ DECLARE_UNITTEST(TestRemoveCopyDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy(my_tag, InputIterator, InputIterator, @@ -228,7 +228,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestRemoveIfSimple); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(my_system &system, ForwardIterator first, ForwardIterator, @@ -252,7 +252,7 @@ DECLARE_UNITTEST(TestRemoveIfDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(my_tag, ForwardIterator first, ForwardIterator, @@ -311,7 +311,7 @@ DECLARE_VECTOR_UNITTEST(TestRemoveIfStencilSimple); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(my_system &system, ForwardIterator first, ForwardIterator, @@ -341,7 +341,7 @@ DECLARE_UNITTEST(TestRemoveIfStencilDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator remove_if(my_tag, ForwardIterator first, ForwardIterator, @@ -397,7 +397,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestRemoveCopyIfSimple); template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator remove_copy_if(my_system &system, InputIterator first, InputIterator, @@ -427,7 +427,7 @@ DECLARE_UNITTEST(TestRemoveCopyIfDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE InputIterator remove_copy_if(my_tag, InputIterator first, InputIterator, @@ -492,7 +492,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(my_system &system, InputIterator1, InputIterator1, @@ -525,7 +525,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator remove_copy_if(my_tag, InputIterator1, InputIterator1, diff --git a/testing/replace.cu b/testing/replace.cu index 380a1f79a..4b5eb503f 100644 --- a/testing/replace.cu +++ b/testing/replace.cu @@ -49,7 +49,7 @@ DECLARE_VECTOR_UNITTEST(TestReplaceSimple); template -__host__ __device__ +THRUST_HOST_DEVICE void replace(my_system &system, ForwardIterator, ForwardIterator, const T &, const T &) @@ -74,7 +74,7 @@ DECLARE_UNITTEST(TestReplaceDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE void replace(my_tag, ForwardIterator first, ForwardIterator, const T &, const T &) @@ -143,7 +143,7 @@ DECLARE_VECTOR_UNITTEST(TestReplaceCopySimple); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy(my_system &system, InputIterator, InputIterator, OutputIterator result, @@ -172,7 +172,7 @@ DECLARE_UNITTEST(TestReplaceCopyDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy(my_tag, InputIterator, InputIterator, OutputIterator result, @@ -246,7 +246,7 @@ DECLARE_VARIABLE_UNITTEST(TestReplaceCopyToDiscardIterator); template struct less_than_five { - __host__ __device__ bool operator()(const T &val) const {return val < 5;} + THRUST_HOST_DEVICE bool operator()(const T &val) const {return val < 5;} }; template @@ -276,7 +276,7 @@ DECLARE_VECTOR_UNITTEST(TestReplaceIfSimple); template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(my_system &system, ForwardIterator, ForwardIterator, Predicate, @@ -302,7 +302,7 @@ DECLARE_UNITTEST(TestReplaceIfDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(my_tag, ForwardIterator first, ForwardIterator, Predicate, @@ -359,7 +359,7 @@ DECLARE_VECTOR_UNITTEST(TestReplaceIfStencilSimple); template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(my_system &system, ForwardIterator, ForwardIterator, InputIterator, @@ -387,7 +387,7 @@ DECLARE_UNITTEST(TestReplaceIfStencilDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE void replace_if(my_tag, ForwardIterator first, ForwardIterator, InputIterator, @@ -472,7 +472,7 @@ DECLARE_VECTOR_UNITTEST(TestReplaceCopyIfSimple); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(my_system &system, InputIterator, InputIterator, OutputIterator result, @@ -501,7 +501,7 @@ DECLARE_UNITTEST(TestReplaceCopyIfDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(my_tag, InputIterator, InputIterator, OutputIterator result, @@ -563,7 +563,7 @@ DECLARE_VECTOR_UNITTEST(TestReplaceCopyIfStencilSimple); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(my_system &system, InputIterator1, InputIterator1, InputIterator2, @@ -595,7 +595,7 @@ DECLARE_UNITTEST(TestReplaceCopyIfStencilDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator replace_copy_if(my_tag, InputIterator1, InputIterator1, InputIterator2, diff --git a/testing/scan.cu b/testing/scan.cu index c8134e5da..8fe3f3520 100644 --- a/testing/scan.cu +++ b/testing/scan.cu @@ -1,6 +1,6 @@ /* * Copyright 2008-2013 NVIDIA Corporation - * Modifications Copyright© 2019 Advanced Micro Devices, Inc. All rights reserved. + * Modifications Copyright© 2019-2024 Advanced Micro Devices, Inc. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,7 +31,7 @@ template struct max_functor { - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T rhs, T lhs) const { return thrust::max(rhs,lhs); @@ -549,7 +549,7 @@ struct plus_mod3 plus_mod3(T * table) : table(table) {} - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T a, T b) { return table[(int) (a + b)]; @@ -598,7 +598,7 @@ struct const_ref_plus_mod3 const_ref_plus_mod3(T * table) : table(table) {} - __host__ __device__ + THRUST_HOST_DEVICE const T& operator()(T a, T b) { return table[(int) (a + b)]; @@ -645,14 +645,14 @@ struct only_set_when_expected_it long long expected; bool * flag; - __host__ __device__ only_set_when_expected_it operator++() const { return *this; } - __host__ __device__ only_set_when_expected_it operator*() const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator++() const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator*() const { return *this; } template - __host__ __device__ only_set_when_expected_it operator+(Difference) const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator+(Difference) const { return *this; } template - __host__ __device__ only_set_when_expected_it operator[](Index) const { return *this; } + THRUST_HOST_DEVICE only_set_when_expected_it operator[](Index) const { return *this; } - __device__ + THRUST_DEVICE void operator=(long long value) const { if (value == expected) @@ -733,9 +733,9 @@ DECLARE_UNITTEST(TestExclusiveScanWithBigIndexes); struct Int { int i{}; - __host__ __device__ explicit Int(int num) : i(num) {} - __host__ __device__ Int() : i{} {} - __host__ __device__ Int operator+(Int const& o) const { return Int{this->i + o.i}; } + THRUST_HOST_DEVICE explicit Int(int num) : i(num) {} + THRUST_HOST_DEVICE Int() : i{} {} + THRUST_HOST_DEVICE Int operator+(Int const& o) const { return Int{this->i + o.i}; } }; void TestInclusiveScanWithUserDefinedType() diff --git a/testing/scan_by_key.exclusive.cu b/testing/scan_by_key.exclusive.cu index 58354d848..269b1e361 100644 --- a/testing/scan_by_key.exclusive.cu +++ b/testing/scan_by_key.exclusive.cu @@ -154,7 +154,7 @@ DECLARE_UNITTEST(TestExclusiveScanByKeyDispatchImplicit); struct head_flag_predicate { template - __host__ __device__ bool operator()(const T&, const T& b) + THRUST_HOST_DEVICE bool operator()(const T&, const T& b) { return b ? false : true; } diff --git a/testing/scan_by_key.inclusive.cu b/testing/scan_by_key.inclusive.cu index b2d2337e2..bce8f66c0 100644 --- a/testing/scan_by_key.inclusive.cu +++ b/testing/scan_by_key.inclusive.cu @@ -134,7 +134,7 @@ DECLARE_UNITTEST(TestInclusiveScanByKeyDispatchImplicit); struct head_flag_predicate { template - __host__ __device__ bool operator()(const T&, const T& b) + THRUST_HOST_DEVICE bool operator()(const T&, const T& b) { return b ? false : true; } diff --git a/testing/scatter.cu b/testing/scatter.cu index cd5318c6f..e81e2ff6e 100644 --- a/testing/scatter.cu +++ b/testing/scatter.cu @@ -52,7 +52,7 @@ DECLARE_INTEGRAL_VECTOR_UNITTEST(TestScatterSimple); template -__host__ __device__ +THRUST_HOST_DEVICE void scatter(my_system &system, InputIterator1, InputIterator1, @@ -82,7 +82,7 @@ DECLARE_UNITTEST(TestScatterDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE void scatter(my_tag, InputIterator1, InputIterator1, @@ -186,7 +186,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(my_system &system, InputIterator1, InputIterator1, @@ -218,7 +218,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void scatter_if(my_tag, InputIterator1, InputIterator1, @@ -248,7 +248,7 @@ template class is_even_scatter_if { public: - __host__ __device__ bool operator()(const T i) const { return (i % 2) == 0; } + THRUST_HOST_DEVICE bool operator()(const T i) const { return (i % 2) == 0; } }; template diff --git a/testing/sequence.cu b/testing/sequence.cu index 0b6d5b0de..ab66ad5f2 100644 --- a/testing/sequence.cu +++ b/testing/sequence.cu @@ -22,7 +22,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(my_system &system, ForwardIterator, ForwardIterator) { system.validate_dispatch(); @@ -41,7 +41,7 @@ DECLARE_UNITTEST(TestSequenceDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE void sequence(my_tag, ForwardIterator first, ForwardIterator) { *first = 13; @@ -150,7 +150,7 @@ struct Vector Vector() = default; // Explicitly disable construction from size_t Vector(std::size_t) = delete; - __host__ __device__ Vector(int x_, int y_) : x{x_}, y{y_} {} + THRUST_HOST_DEVICE Vector(int x_, int y_) : x{x_}, y{y_} {} Vector(const Vector&) = default; Vector &operator=(const Vector&) = default; @@ -158,18 +158,18 @@ struct Vector }; // Vector-Vector addition -__host__ __device__ Vector operator+(const Vector a, const Vector b) +THRUST_HOST_DEVICE Vector operator+(const Vector a, const Vector b) { return Vector{a.x + b.x, a.y + b.y}; } // Vector-Scalar Multiplication // Multiplication by std::size_t is required by thrust::sequence. -__host__ __device__ Vector operator*(const std::size_t a, const Vector b) +THRUST_HOST_DEVICE Vector operator*(const std::size_t a, const Vector b) { return Vector{static_cast(a) * b.x, static_cast(a) * b.y}; } -__host__ __device__ Vector operator*(const Vector b, const std::size_t a) +THRUST_HOST_DEVICE Vector operator*(const Vector b, const std::size_t a) { return Vector{static_cast(a) * b.x, static_cast(a) * b.y}; } diff --git a/testing/sort_permutation_iterator.cu b/testing/sort_permutation_iterator.cu index 33d6ac6e1..9cdf20269 100644 --- a/testing/sort_permutation_iterator.cu +++ b/testing/sort_permutation_iterator.cu @@ -20,7 +20,7 @@ class strided_range stride_functor(difference_type stride) : stride(stride) {} - __host__ __device__ + THRUST_HOST_DEVICE difference_type operator()(const difference_type& i) const { return stride * i; diff --git a/testing/stable_sort.cu b/testing/stable_sort.cu index b17f8debf..1d1e10de9 100644 --- a/testing/stable_sort.cu +++ b/testing/stable_sort.cu @@ -60,7 +60,7 @@ DECLARE_UNITTEST(TestStableSortDispatchImplicit); template struct less_div_10 { - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return ((int) lhs) / 10 < ((int) rhs) / 10;} + THRUST_HOST_DEVICE bool operator()(const T &lhs, const T &rhs) const {return ((int) lhs) / 10 < ((int) rhs) / 10;} }; template @@ -148,7 +148,7 @@ struct comp_mod3 comp_mod3(T * table) : table(table) {} - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T a, T b) { return table[(int) a] < table[(int) b]; diff --git a/testing/stable_sort_by_key.cu b/testing/stable_sort_by_key.cu index e3736542d..d77fed986 100644 --- a/testing/stable_sort_by_key.cu +++ b/testing/stable_sort_by_key.cu @@ -43,7 +43,7 @@ DECLARE_UNITTEST(TestStableSortByKeyDispatchImplicit); template struct less_div_10 { - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return ((int) lhs) / 10 < ((int) rhs) / 10;} + THRUST_HOST_DEVICE bool operator()(const T &lhs, const T &rhs) const {return ((int) lhs) / 10 < ((int) rhs) / 10;} }; diff --git a/testing/stable_sort_by_key_large_values.cu b/testing/stable_sort_by_key_large_values.cu index b37753973..56ba05d9f 100644 --- a/testing/stable_sort_by_key_large_values.cu +++ b/testing/stable_sort_by_key_large_values.cu @@ -6,7 +6,7 @@ template struct greater_div_10 { - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const + THRUST_HOST_DEVICE bool operator()(const T& lhs, const T& rhs) const { return ((int)lhs) / 10 > ((int)rhs) / 10; } diff --git a/testing/swap_ranges.cu b/testing/swap_ranges.cu index 843c66240..ff41fd93d 100644 --- a/testing/swap_ranges.cu +++ b/testing/swap_ranges.cu @@ -121,28 +121,27 @@ DECLARE_UNITTEST(TestSwapRangesForcedIterator); struct type_with_swap { - inline __host__ __device__ - type_with_swap() - : m_x(), m_swapped(false) + inline THRUST_HOST_DEVICE type_with_swap() + : m_x() + , m_swapped(false) {} - inline __host__ __device__ - type_with_swap(int x) - : m_x(x), m_swapped(false) + inline THRUST_HOST_DEVICE type_with_swap(int x) + : m_x(x) + , m_swapped(false) {} - inline __host__ __device__ - type_with_swap(int x, bool s) - : m_x(x), m_swapped(s) + inline THRUST_HOST_DEVICE type_with_swap(int x, bool s) + : m_x(x) + , m_swapped(s) {} - inline __host__ __device__ - type_with_swap(const type_with_swap &other) - : m_x(other.m_x), m_swapped(other.m_swapped) + inline THRUST_HOST_DEVICE type_with_swap(const type_with_swap& other) + : m_x(other.m_x) + , m_swapped(other.m_swapped) {} - inline __host__ __device__ - bool operator==(const type_with_swap &other) const + inline THRUST_HOST_DEVICE bool operator==(const type_with_swap& other) const { return m_x == other.m_x && m_swapped == other.m_swapped; } @@ -155,8 +154,7 @@ struct type_with_swap bool m_swapped; }; -inline __host__ __device__ -void swap(type_with_swap &a, type_with_swap &b) +inline THRUST_HOST_DEVICE void swap(type_with_swap& a, type_with_swap& b) { thrust::swap(a.m_x, b.m_x); a.m_swapped = true; diff --git a/testing/transform.cu b/testing/transform.cu index 53b941c49..f0299de49 100644 --- a/testing/transform.cu +++ b/testing/transform.cu @@ -57,7 +57,7 @@ DECLARE_VECTOR_UNITTEST(TestTransformUnarySimple); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(my_system &system, InputIterator, InputIterator, OutputIterator result, UnaryFunction) { system.validate_dispatch(); @@ -83,7 +83,7 @@ DECLARE_UNITTEST(TestTransformUnaryDispatchExplicit); template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(my_tag, InputIterator, InputIterator, OutputIterator result, UnaryFunction) { *result = 13; @@ -135,7 +135,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(my_system &system, InputIterator, InputIterator, @@ -168,7 +168,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(my_tag, InputIterator, InputIterator, @@ -230,7 +230,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(my_system &system, InputIterator1, InputIterator1, @@ -264,7 +264,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(my_tag, InputIterator1, InputIterator1, @@ -322,7 +322,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(my_system &system, InputIterator1, InputIterator1, InputIterator2, OutputIterator result, UnaryFunction) { system.validate_dispatch(); @@ -350,7 +350,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform(my_tag, InputIterator1, InputIterator1, InputIterator2, OutputIterator result, UnaryFunction) { *result = 13; @@ -415,7 +415,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(my_system &system, InputIterator1, InputIterator1, @@ -454,7 +454,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE ForwardIterator transform_if(my_tag, InputIterator1, InputIterator1, @@ -527,7 +527,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformUnaryToDiscardIterator); struct repeat2 { template - __host__ __device__ + THRUST_HOST_DEVICE thrust::pair operator()(T x) { return thrust::make_pair(x,x); @@ -575,7 +575,7 @@ DECLARE_VARIABLE_UNITTEST(TestTransformUnaryToDiscardIteratorZipped); struct is_positive { template - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(T &x) { return x > 0; @@ -870,7 +870,7 @@ struct plus_mod3 plus_mod3(T * table) : table(table) {} - __host__ __device__ + THRUST_HOST_DEVICE T operator()(T a, T b) { return table[(int) (a + b)]; diff --git a/testing/transform_reduce.cu b/testing/transform_reduce.cu index 76f83cc92..3f30e7e3e 100644 --- a/testing/transform_reduce.cu +++ b/testing/transform_reduce.cu @@ -27,7 +27,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType transform_reduce(my_system &system, InputIterator, InputIterator, @@ -59,7 +59,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputType transform_reduce(my_tag, InputIterator first, InputIterator, diff --git a/testing/transform_scan.cu b/testing/transform_scan.cu index 4b3e5b3e5..cbe303e10 100644 --- a/testing/transform_scan.cu +++ b/testing/transform_scan.cu @@ -29,7 +29,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_inclusive_scan(my_system &system, InputIterator, InputIterator, @@ -62,7 +62,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_inclusive_scan(my_tag, InputIterator, InputIterator, @@ -94,7 +94,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_exclusive_scan(my_system &system, InputIterator, InputIterator, @@ -130,7 +130,7 @@ template -__host__ __device__ +THRUST_HOST_DEVICE OutputIterator transform_exclusive_scan(my_tag, InputIterator, InputIterator, @@ -231,7 +231,7 @@ struct Record { }; struct negate { - __host__ __device__ int operator()(Record const& record) const + THRUST_HOST_DEVICE int operator()(Record const& record) const { return - record.number; } diff --git a/testing/tuple.cu b/testing/tuple.cu index af57dbf47..e52a0ac25 100644 --- a/testing/tuple.cu +++ b/testing/tuple.cu @@ -342,15 +342,13 @@ SimpleUnitTest TestTupleComparisonInstance; template struct TestTupleTieFunctor { - __host__ __device__ - void clear(T *data) const + THRUST_HOST_DEVICE void clear(T* data) const { for(int i = 0; i < 10; ++i) data[i] = 13; } - __host__ __device__ - bool operator()() const + THRUST_HOST_DEVICE bool operator()() const { using namespace thrust; diff --git a/testing/tuple_algorithms.cu b/testing/tuple_algorithms.cu index 449fdc2f1..038d6dd6f 100644 --- a/testing/tuple_algorithms.cu +++ b/testing/tuple_algorithms.cu @@ -11,8 +11,7 @@ struct custom_square { template - __host__ __device__ - T operator()(T v) const + THRUST_HOST_DEVICE T operator()(T v) const { return v * v; } @@ -21,8 +20,7 @@ struct custom_square struct custom_square_inplace { template - __host__ __device__ - void operator()(T& v) const + THRUST_HOST_DEVICE void operator()(T& v) const { v *= v; } diff --git a/testing/tuple_reduce.cu b/testing/tuple_reduce.cu index 9edea4bda..2c9eb4667 100644 --- a/testing/tuple_reduce.cu +++ b/testing/tuple_reduce.cu @@ -8,7 +8,7 @@ using namespace unittest; struct SumTupleFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE Tuple operator()(const Tuple &lhs, const Tuple &rhs) { using thrust::get; @@ -21,7 +21,7 @@ struct SumTupleFunctor struct MakeTupleFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple operator()(T1 &lhs, T2 &rhs) { return thrust::make_tuple(lhs, rhs); diff --git a/testing/tuple_scan.cu b/testing/tuple_scan.cu index d0565d6d4..3ff2b8283 100644 --- a/testing/tuple_scan.cu +++ b/testing/tuple_scan.cu @@ -12,7 +12,7 @@ using namespace unittest; struct SumTupleFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE Tuple operator()(const Tuple &lhs, const Tuple &rhs) { using thrust::get; @@ -25,7 +25,7 @@ struct SumTupleFunctor struct MakeTupleFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple operator()(T1 &lhs, T2 &rhs) { return thrust::make_tuple(lhs, rhs); diff --git a/testing/tuple_sort.cu b/testing/tuple_sort.cu index db51c1183..071c5a2f3 100644 --- a/testing/tuple_sort.cu +++ b/testing/tuple_sort.cu @@ -8,7 +8,7 @@ using namespace unittest; struct MakeTupleFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple operator()(T1 &lhs, T2 &rhs) { return thrust::make_tuple(lhs, rhs); @@ -19,7 +19,7 @@ template struct GetFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE typename thrust::tuple_element::type operator()(const Tuple &t) { return thrust::get(t); diff --git a/testing/tuple_transform.cu b/testing/tuple_transform.cu index 7aef96aa1..ca16e08be 100644 --- a/testing/tuple_transform.cu +++ b/testing/tuple_transform.cu @@ -7,7 +7,7 @@ using namespace unittest; struct MakeTupleFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE thrust::tuple operator()(T1 &lhs, T2 &rhs) { return thrust::make_tuple(lhs, rhs); @@ -18,7 +18,7 @@ template struct GetFunctor { template - __host__ __device__ + THRUST_HOST_DEVICE typename thrust::tuple_element::type operator()(const Tuple &t) { return thrust::get(t); diff --git a/testing/uninitialized_copy.cu b/testing/uninitialized_copy.cu index 8eb26f4d8..c50387ede 100644 --- a/testing/uninitialized_copy.cu +++ b/testing/uninitialized_copy.cu @@ -156,13 +156,13 @@ DECLARE_VECTOR_UNITTEST(TestUninitializedCopyNSimplePOD); struct CopyConstructTest { - __host__ __device__ + THRUST_HOST_DEVICE CopyConstructTest(void) :copy_constructed_on_host(false), copy_constructed_on_device(false) {} - __host__ __device__ + THRUST_HOST_DEVICE CopyConstructTest(const CopyConstructTest &) { NV_IF_TARGET(NV_IS_DEVICE, ( @@ -174,7 +174,7 @@ struct CopyConstructTest )); } - __host__ __device__ + THRUST_HOST_DEVICE CopyConstructTest &operator=(const CopyConstructTest &x) { copy_constructed_on_host = x.copy_constructed_on_host; diff --git a/testing/uninitialized_fill.cu b/testing/uninitialized_fill.cu index ea58c9b4e..62efaffd8 100644 --- a/testing/uninitialized_fill.cu +++ b/testing/uninitialized_fill.cu @@ -165,13 +165,13 @@ DECLARE_VECTOR_UNITTEST(TestUninitializedFillPOD); struct CopyConstructTest { - __host__ __device__ + THRUST_HOST_DEVICE CopyConstructTest(void) :copy_constructed_on_host(false), copy_constructed_on_device(false) {} - __host__ __device__ + THRUST_HOST_DEVICE CopyConstructTest(const CopyConstructTest &) { NV_IF_TARGET(NV_IS_DEVICE, ( @@ -183,7 +183,7 @@ struct CopyConstructTest )); } - __host__ __device__ + THRUST_HOST_DEVICE CopyConstructTest &operator=(const CopyConstructTest &x) { copy_constructed_on_host = x.copy_constructed_on_host; diff --git a/testing/unique.cu b/testing/unique.cu index 7df2def87..b04c9678f 100644 --- a/testing/unique.cu +++ b/testing/unique.cu @@ -142,7 +142,7 @@ DECLARE_UNITTEST(TestUniqueCountDispatchImplicit); template struct is_equal_div_10_unique { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T x, const T& y) const { return ((int) x / 10) == ((int) y / 10); } }; diff --git a/testing/unique_by_key.cu b/testing/unique_by_key.cu index ec17ef63f..0aad37b4b 100644 --- a/testing/unique_by_key.cu +++ b/testing/unique_by_key.cu @@ -8,7 +8,7 @@ template struct index_to_value_t { template - __host__ __device__ __forceinline__ ValueT operator()(IndexT index) + THRUST_HOST_DEVICE __forceinline__ ValueT operator()(IndexT index) { if (static_cast(index) == 4300000000ULL) { @@ -137,7 +137,7 @@ DECLARE_UNITTEST(TestUniqueByKeyCopyDispatchImplicit); template struct is_equal_div_10_unique { - __host__ __device__ + THRUST_HOST_DEVICE bool operator()(const T x, const T& y) const { return ((int) x / 10) == ((int) y / 10); } }; diff --git a/testing/unittest/runtime_static_assert.h b/testing/unittest/runtime_static_assert.h index 1f1c5a4e9..25e28829f 100644 --- a/testing/unittest/runtime_static_assert.h +++ b/testing/unittest/runtime_static_assert.h @@ -11,7 +11,7 @@ namespace unittest { - __host__ __device__ + THRUST_HOST_DEVICE void assert_static(bool condition, const char * filename, int lineno); } @@ -84,12 +84,12 @@ namespace unittest class static_assert_exception { public: - __host__ __device__ + THRUST_HOST_DEVICE static_assert_exception() : triggered(false) { } - __host__ __device__ + THRUST_HOST_DEVICE static_assert_exception(const char * filename, int lineno) : triggered(true), filename(filename), lineno(lineno) { @@ -106,10 +106,10 @@ namespace unittest THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_CLANG __attribute__((used)) #endif - __device__ static static_assert_exception* device_exception = NULL; + THRUST_DEVICE static static_assert_exception* device_exception = NULL; } - __host__ __device__ + THRUST_HOST_DEVICE void assert_static(bool condition, const char * filename, int lineno) { if (!condition) diff --git a/testing/unittest/special_types.h b/testing/unittest/special_types.h index d47308f57..8edb24d87 100644 --- a/testing/unittest/special_types.h +++ b/testing/unittest/special_types.h @@ -25,21 +25,21 @@ struct FixedVector { T data[N]; - __host__ __device__ + THRUST_HOST_DEVICE FixedVector() { for(unsigned int i = 0; i < N; i++) data[i] = T(); } - __host__ __device__ + THRUST_HOST_DEVICE FixedVector(T init) { for(unsigned int i = 0; i < N; i++) data[i] = init; } - __host__ __device__ + THRUST_HOST_DEVICE FixedVector operator+(const FixedVector& bs) const { FixedVector output; @@ -48,7 +48,7 @@ struct FixedVector return output; } - __host__ __device__ + THRUST_HOST_DEVICE bool operator<(const FixedVector& bs) const { for(unsigned int i = 0; i < N; i++) @@ -61,7 +61,7 @@ struct FixedVector return false; } - __host__ __device__ + THRUST_HOST_DEVICE bool operator==(const FixedVector& bs) const { for(unsigned int i = 0; i < N; i++) @@ -79,35 +79,35 @@ template typedef Key key_type; typedef Value value_type; - __host__ __device__ + THRUST_HOST_DEVICE key_value(void) : key(), value() {} - __host__ __device__ + THRUST_HOST_DEVICE key_value(key_type k, value_type v) : key(k), value(v) {} - __host__ __device__ + THRUST_HOST_DEVICE bool operator<(const key_value &rhs) const { return key < rhs.key; } - __host__ __device__ + THRUST_HOST_DEVICE bool operator>(const key_value &rhs) const { return key > rhs.key; } - __host__ __device__ + THRUST_HOST_DEVICE bool operator==(const key_value &rhs) const { return key == rhs.key && value == rhs.value; } - __host__ __device__ + THRUST_HOST_DEVICE bool operator!=(const key_value &rhs) const { return !operator==(rhs); @@ -124,7 +124,7 @@ template struct user_swappable { - inline __host__ __device__ + inline THRUST_HOST_DEVICE user_swappable(bool swapped = false) : was_swapped(swapped) {} @@ -132,13 +132,13 @@ struct user_swappable bool was_swapped; }; -inline __host__ __device__ +inline THRUST_HOST_DEVICE bool operator==(const user_swappable &x, const user_swappable &y) { return x.was_swapped == y.was_swapped; } -inline __host__ __device__ +inline THRUST_HOST_DEVICE void swap(user_swappable &x, user_swappable &y) { x.was_swapped = true; diff --git a/testing/unittest/testframework.h b/testing/unittest/testframework.h index ddd7ca86d..55271d196 100644 --- a/testing/unittest/testframework.h +++ b/testing/unittest/testframework.h @@ -91,7 +91,7 @@ typedef unittest::type_list::value>::type> - __host__ __device__ + THRUST_HOST_DEVICE custom_numeric(const T& i) { fill(static_cast(i)); } - __host__ __device__ + THRUST_HOST_DEVICE custom_numeric(const custom_numeric & other) { fill(other.value[0]); } - __host__ __device__ + THRUST_HOST_DEVICE custom_numeric & operator=(int val) { fill(val); return *this; } - __host__ __device__ + THRUST_HOST_DEVICE custom_numeric & operator=(const custom_numeric & other) { fill(other.value[0]); @@ -128,7 +128,7 @@ class custom_numeric // cast to void * instead of bool to fool overload resolution // WTB C++11 explicit conversion operators - __host__ __device__ + THRUST_HOST_DEVICE operator void *() const { // static cast first to avoid MSVC warning C4312 @@ -136,12 +136,12 @@ class custom_numeric } #define DEFINE_OPERATOR(op) \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ custom_numeric & operator op() { \ fill(op value[0]); \ return *this; \ } \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ custom_numeric operator op(int) const { \ custom_numeric ret(*this); \ op ret; \ @@ -154,7 +154,7 @@ class custom_numeric #undef DEFINE_OPERATOR #define DEFINE_OPERATOR(op) \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ custom_numeric operator op () const \ { \ return custom_numeric(op value[0]); \ @@ -167,7 +167,7 @@ class custom_numeric #undef DEFINE_OPERATOR #define DEFINE_OPERATOR(op) \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ custom_numeric operator op (const custom_numeric & other) const \ { \ return custom_numeric(value[0] op other.value[0]); \ @@ -189,7 +189,7 @@ class custom_numeric #define CONCAT(X, Y) X ## Y #define DEFINE_OPERATOR(op) \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ custom_numeric & operator CONCAT(op, =) (const custom_numeric & other) \ { \ fill(value[0] op other.value[0]); \ @@ -210,7 +210,7 @@ class custom_numeric #undef DEFINE_OPERATOR #define DEFINE_OPERATOR(op) \ - __host__ __device__ \ + THRUST_HOST_DEVICE \ friend bool operator op (const custom_numeric & lhs, const custom_numeric & rhs) \ { \ return lhs.value[0] op rhs.value[0]; \ @@ -236,7 +236,7 @@ class custom_numeric private: int value[5]; - __host__ __device__ + THRUST_HOST_DEVICE void fill(int val) { for (int i = 0; i < 5; ++i) diff --git a/testing/unittest/util_async.h b/testing/unittest/util_async.h index 9a3454efd..215a85c3e 100644 --- a/testing/unittest/util_async.h +++ b/testing/unittest/util_async.h @@ -21,10 +21,7 @@ namespace unittest { template -__host__ -void test_event_wait( - Event&& e, std::string const& filename = "unknown", int lineno = -1 -) +THRUST_HOST void test_event_wait(Event&& e, std::string const& filename = "unknown", int lineno = -1) { ASSERT_EQUAL_WITH_FILE_AND_LINE(true, e.valid_stream(), filename, lineno); @@ -36,10 +33,8 @@ void test_event_wait( } template -__host__ -auto test_future_value_retrieval( - Future&& f, std::string const& filename = "unknown", int lineno = -1 -) -> decltype(f.extract()) +THRUST_HOST auto test_future_value_retrieval(Future&& f, std::string const& filename = "unknown", int lineno = -1) + -> decltype(f.extract()) { ASSERT_EQUAL_WITH_FILE_AND_LINE(true, f.valid_stream(), filename, lineno); ASSERT_EQUAL_WITH_FILE_AND_LINE(true, f.valid_content(), filename, lineno); diff --git a/testing/unittest_static_assert.cu b/testing/unittest_static_assert.cu index 7ed0d5658..9641ecf0a 100644 --- a/testing/unittest_static_assert.cu +++ b/testing/unittest_static_assert.cu @@ -11,7 +11,7 @@ struct dependent_false template struct static_assertion { - __host__ __device__ + THRUST_HOST_DEVICE T operator()() const { THRUST_STATIC_ASSERT(dependent_false::value); diff --git a/testing/vector.cu b/testing/vector.cu index e4ca5cb74..4aad55c95 100644 --- a/testing/vector.cu +++ b/testing/vector.cu @@ -731,7 +731,7 @@ struct LargeStruct { int data[N]; - __host__ __device__ + THRUST_HOST_DEVICE bool operator==(const LargeStruct & ls) const { for (int i = 0; i < N; i++) diff --git a/testing/zip_function.cu b/testing/zip_function.cu index 16332fb77..c5c5c8d47 100644 --- a/testing/zip_function.cu +++ b/testing/zip_function.cu @@ -18,13 +18,13 @@ using namespace unittest; struct SumThree { template - __host__ __device__ auto operator()(T1 x, T2 y, T3 z) const THRUST_DECLTYPE_RETURNS(x + y + z) + THRUST_HOST_DEVICE auto operator()(T1 x, T2 y, T3 z) const THRUST_DECLTYPE_RETURNS(x + y + z) }; // end SumThree struct SumThreeTuple { template - __host__ __device__ auto operator()(Tuple x) const + THRUST_HOST_DEVICE auto operator()(Tuple x) const THRUST_DECLTYPE_RETURNS(thrust::get<0>(x) + thrust::get<1>(x) + thrust::get<2>(x)) }; // end SumThreeTuple diff --git a/testing/zip_iterator.cu b/testing/zip_iterator.cu index 9d103df0f..e7f009f8d 100644 --- a/testing/zip_iterator.cu +++ b/testing/zip_iterator.cu @@ -298,10 +298,9 @@ DECLARE_VECTOR_UNITTEST(TestZipIteratorCopy); struct SumTwoTuple { - template - __host__ __device__ - typename thrust::detail::remove_reference::type>::type - operator()(Tuple x) const + template + THRUST_HOST_DEVICE typename thrust::detail::remove_reference::type>::type + operator()(Tuple x) const { return thrust::get<0>(x) + thrust::get<1>(x); } @@ -309,10 +308,9 @@ struct SumTwoTuple struct SumThreeTuple { - template - __host__ __device__ - typename thrust::detail::remove_reference::type>::type - operator()(Tuple x) const + template + THRUST_HOST_DEVICE typename thrust::detail::remove_reference::type>::type + operator()(Tuple x) const { return thrust::get<0>(x) + thrust::get<1>(x) + thrust::get<2>(x); } diff --git a/testing/zip_iterator_reduce.cu b/testing/zip_iterator_reduce.cu index c1ad037dd..458c11bc3 100644 --- a/testing/zip_iterator_reduce.cu +++ b/testing/zip_iterator_reduce.cu @@ -7,7 +7,7 @@ using namespace unittest; template struct TuplePlus { - __host__ __device__ + THRUST_HOST_DEVICE Tuple operator()(Tuple x, Tuple y) const { using namespace thrust; diff --git a/testing/zip_iterator_reduce_by_key.cu b/testing/zip_iterator_reduce_by_key.cu index 9076bcba5..cc8dbe423 100644 --- a/testing/zip_iterator_reduce_by_key.cu +++ b/testing/zip_iterator_reduce_by_key.cu @@ -11,8 +11,7 @@ using namespace unittest; template struct TuplePlus { - __host__ __device__ - Tuple operator()(Tuple x, Tuple y) const + THRUST_HOST_DEVICE Tuple operator()(Tuple x, Tuple y) const { using namespace thrust; return make_tuple(get<0>(x) + get<0>(y), diff --git a/testing/zip_iterator_scan.cu b/testing/zip_iterator_scan.cu index 96ace6d76..b545c5997 100644 --- a/testing/zip_iterator_scan.cu +++ b/testing/zip_iterator_scan.cu @@ -12,7 +12,7 @@ using namespace unittest; template struct TuplePlus { - __host__ __device__ + THRUST_HOST_DEVICE Tuple operator()(Tuple x, Tuple y) const { using namespace thrust; diff --git a/thrust/complex.h b/thrust/complex.h index f95cace86..fd24430ad 100644 --- a/thrust/complex.h +++ b/thrust/complex.h @@ -37,10 +37,8 @@ const typename thrust::detail::remove_reference::type::value_type (&)[2] \ >(z)[0] # define THRUST_STD_COMPLEX_IMAG(z) \ - reinterpret_cast< \ - const typename thrust::detail::remove_reference::type::value_type (&)[2] \ - >(z)[1] -# define THRUST_STD_COMPLEX_DEVICE __device__ + reinterpret_cast::type::value_type(&)[2]>(z)[1] +# define THRUST_STD_COMPLEX_DEVICE THRUST_DEVICE #else # define THRUST_STD_COMPLEX_REAL(z) (z).real() # define THRUST_STD_COMPLEX_IMAG(z) (z).imag() @@ -223,8 +221,7 @@ struct complex * * \param z The \p complex to copy from. */ - __host__ THRUST_STD_COMPLEX_DEVICE - complex(const std::complex& z); + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex(const std::complex& z); /*! This converting copy constructor copies from a std::complex with * a type that is convertible to this \p complex's \c value_type. @@ -234,10 +231,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ THRUST_STD_COMPLEX_DEVICE - complex(const std::complex& z); - - + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex(const std::complex& z); /* --- Assignment Operators --- */ @@ -282,8 +276,7 @@ struct complex * * \param z The \p complex to copy from. */ - __host__ THRUST_STD_COMPLEX_DEVICE - complex& operator=(const std::complex& z); + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex& operator=(const std::complex& z); /*! Assign `z.real()` and `z.imag()` to the real and imaginary parts of this * \p complex respectively. @@ -293,9 +286,7 @@ struct complex * \tparam U is convertible to \c value_type. */ template - __host__ THRUST_STD_COMPLEX_DEVICE - complex& operator=(const std::complex& z); - + THRUST_HOST THRUST_STD_COMPLEX_DEVICE complex& operator=(const std::complex& z); /* --- Compound Assignment Operators --- */ @@ -455,8 +446,10 @@ struct complex /*! Casts this \p complex to a std::complex of the same type. */ - __host__ - operator std::complex() const { return std::complex(real(), imag()); } + THRUST_HOST operator std::complex() const + { + return std::complex(real(), imag()); + } private: typename detail::complex_storage::type data; @@ -932,11 +925,7 @@ operator<<(std::basic_ostream& os, const complex& z); * \param z The \p complex number to set. */ template -__host__ -std::basic_istream& -operator>>(std::basic_istream& is, complex& z); - - +THRUST_HOST std::basic_istream& operator>>(std::basic_istream& is, complex& z); /* --- Equality Operators --- */ @@ -955,8 +944,7 @@ bool operator==(const complex& x, const complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE -bool operator==(const complex& x, const std::complex& y); +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator==(const complex& x, const std::complex& y); /*! Returns true if two \p complex numbers are equal and false otherwise. * @@ -964,8 +952,7 @@ bool operator==(const complex& x, const std::complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE -bool operator==(const std::complex& x, const complex& y); +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator==(const std::complex& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is zero and * the real part is equal to the scalar. Returns false otherwise. @@ -1002,8 +989,7 @@ bool operator!=(const complex& x, const complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE -bool operator!=(const complex& x, const std::complex& y); +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator!=(const complex& x, const std::complex& y); /*! Returns true if two \p complex numbers are different and false otherwise. * @@ -1011,8 +997,7 @@ bool operator!=(const complex& x, const std::complex& y); * \param y The second \p complex. */ template -__host__ THRUST_STD_COMPLEX_DEVICE -bool operator!=(const std::complex& x, const complex& y); +THRUST_HOST THRUST_STD_COMPLEX_DEVICE bool operator!=(const std::complex& x, const complex& y); /*! Returns true if the imaginary part of the \p complex number is not zero or * the real part is different from the scalar. Returns false otherwise. diff --git a/thrust/detail/config/execution_space.h b/thrust/detail/config/execution_space.h index d691907fe..240869011 100644 --- a/thrust/detail/config/execution_space.h +++ b/thrust/detail/config/execution_space.h @@ -26,6 +26,7 @@ #define THRUST_FORCEINLINE __forceinline__ #endif +#if !defined(__HIP__) #if !defined(THRUST_EXEC_CHECK_DISABLE) # if defined(_CCCL_CUDA_COMPILER_NVCC) # if defined(_CCCL_COMPILER_MSVC) @@ -37,5 +38,8 @@ # define THRUST_EXEC_CHECK_DISABLE # endif // _CCCL_CUDA_COMPILER_NVCC #endif // !THRUST_EXEC_CHECK_DISABLE +#else +#define THRUST_EXEC_CHECK_DISABLE +#endif // !HIP #endif // THRUST_DETAIL_CONFIG_EXECUTION_SPACE_H \ No newline at end of file From 15a07b028294aa0dfbd586ff85849998ad6cae06 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Wed, 31 Jul 2024 14:15:05 +0000 Subject: [PATCH 25/44] Updated CHANGELOG --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d5e12acdb..046ce2c53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ Documentation for rocThrust available at [https://rocm.docs.amd.com/projects/rocThrust/en/latest/](https://rocm.docs.amd.com/projects/rocThrust/en/latest/). +## (Unreleased) rocThrust 3.3.0 for ROCm 6.4 + +### Additions + +* Merged changes from upstream CCCL/thrust 2.4.0 + ## (Unreleased) rocThrust 3.2.0 for ROCm 6.3 ### Additions From 158a1e117ecf7db43339d61aa7a637615528732c Mon Sep 17 00:00:00 2001 From: Nick Breed Date: Thu, 1 Aug 2024 08:01:20 +0000 Subject: [PATCH 26/44] Added operator to transform_reduce benchmark --- benchmarks/bench/transform_reduce/sum.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/bench/transform_reduce/sum.cu b/benchmarks/bench/transform_reduce/sum.cu index b4f3c6c90..38f266446 100644 --- a/benchmarks/bench/transform_reduce/sum.cu +++ b/benchmarks/bench/transform_reduce/sum.cu @@ -44,7 +44,8 @@ struct sum bench_utils::gpu_timer d_timer; d_timer.start(0); - bench_utils::do_not_optimize(thrust::reduce(Policy {}, input.begin(), input.end())); + bench_utils::do_not_optimize( + thrust::reduce(Policy {}, input.begin(), input.end(), T {}, thrust::plus {})); d_timer.stop(0); return d_timer.get_duration(); From d0bf50feda7bd16aee6fa2ef7a1b8cbb598030ae Mon Sep 17 00:00:00 2001 From: Nick Breed Date: Thu, 1 Aug 2024 13:22:53 +0000 Subject: [PATCH 27/44] Added mem allocator in benchmarks --- benchmarks/bench/adjacent_difference/basic.cu | 11 ++-- .../bench/adjacent_difference/custom.cu | 11 ++-- .../bench/adjacent_difference/in_place.cu | 11 ++-- benchmarks/bench/copy/basic.cu | 11 ++-- benchmarks/bench/copy/if.cu | 15 +++-- benchmarks/bench/fill/basic.cu | 11 ++-- benchmarks/bench/for_each/basic.cu | 11 ++-- benchmarks/bench/inner_product/basic.cu | 11 ++-- benchmarks/bench/merge/basic.cu | 13 ++-- benchmarks/bench/partition/basic.cu | 13 ++-- benchmarks/bench/reduce/basic.cu | 11 ++-- benchmarks/bench/reduce/by_key.cu | 12 ++-- benchmarks/bench/scan/exclusive/by_key.cu | 14 ++-- benchmarks/bench/scan/exclusive/max.cu | 12 ++-- benchmarks/bench/scan/exclusive/sum.cu | 11 ++-- benchmarks/bench/scan/inclusive/by_key.cu | 14 ++-- benchmarks/bench/scan/inclusive/max.cu | 11 ++-- benchmarks/bench/scan/inclusive/sum.cu | 9 ++- benchmarks/bench/set_operations/base.hpp | 13 ++-- benchmarks/bench/set_operations/by_key.hpp | 15 +++-- benchmarks/bench/shuffle/basic.cu | 11 ++-- benchmarks/bench/sort/keys.cu | 11 ++-- benchmarks/bench/sort/keys_custom.cu | 11 ++-- benchmarks/bench/sort/pairs.cu | 12 ++-- benchmarks/bench/sort/pairs_custom.cu | 15 +++-- benchmarks/bench/tabulate/basic.cu | 11 ++-- benchmarks/bench/transform/basic.cu | 13 ++-- benchmarks/bench/transform_reduce/sum.cu | 11 ++-- benchmarks/bench/unique/basic.cu | 11 ++-- benchmarks/bench/unique/by_key.cu | 12 ++-- benchmarks/bench/vectorized_search/basic.cu | 10 ++- .../bench/vectorized_search/lower_bound.cu | 12 ++-- .../bench/vectorized_search/upper_bound.cu | 12 ++-- benchmarks/bench_utils/bench_utils.hpp | 65 +++++++++++++++++++ 34 files changed, 318 insertions(+), 139 deletions(-) diff --git a/benchmarks/bench/adjacent_difference/basic.cu b/benchmarks/bench/adjacent_difference/basic.cu index 6e50295a4..189ec34b2 100644 --- a/benchmarks/bench/adjacent_difference/basic.cu +++ b/benchmarks/bench/adjacent_difference/basic.cu @@ -38,13 +38,13 @@ struct basic { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::adjacent_difference(Policy {}, input.cbegin(), input.cend(), output.begin()); + thrust::adjacent_difference(policy, input.cbegin(), input.cend(), output.begin()); d_timer.stop(0); return d_timer.get_duration(); @@ -66,9 +66,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/adjacent_difference/custom.cu b/benchmarks/bench/adjacent_difference/custom.cu index 53619c2c8..b5c41003c 100644 --- a/benchmarks/bench/adjacent_difference/custom.cu +++ b/benchmarks/bench/adjacent_difference/custom.cu @@ -57,14 +57,14 @@ struct custom_op template struct custom { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); thrust::adjacent_difference( - Policy {}, input.cbegin(), input.cend(), output.begin(), custom_op {Val}); + policy, input.cbegin(), input.cend(), output.begin(), custom_op {Val}); d_timer.stop(0); return d_timer.get_duration(); @@ -86,9 +86,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/adjacent_difference/in_place.cu b/benchmarks/bench/adjacent_difference/in_place.cu index 265081c4b..75c8fd44f 100644 --- a/benchmarks/bench/adjacent_difference/in_place.cu +++ b/benchmarks/bench/adjacent_difference/in_place.cu @@ -38,13 +38,13 @@ struct in_place { - template - float64_t run(thrust::device_vector& input) + template + float64_t run(thrust::device_vector& input, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::adjacent_difference(Policy {}, input.cbegin(), input.cend(), input.begin()); + thrust::adjacent_difference(policy, input.cbegin(), input.cend(), input.begin()); d_timer.stop(0); return d_timer.get_duration(); @@ -63,9 +63,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Generate input thrust::device_vector input = bench_utils::generate(elements, seed_type); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input); + float64_t duration = benchmark.template run(input, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/copy/basic.cu b/benchmarks/bench/copy/basic.cu index 51303131e..bf278adda 100644 --- a/benchmarks/bench/copy/basic.cu +++ b/benchmarks/bench/copy/basic.cu @@ -38,13 +38,13 @@ struct basic { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::copy(Policy {}, input.cbegin(), input.cend(), output.begin()); + thrust::copy(policy, input.cbegin(), input.cend(), output.begin()); d_timer.stop(0); return d_timer.get_duration(); @@ -68,9 +68,12 @@ void run_benchmark(benchmark::State& state, // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/copy/if.cu b/benchmarks/bench/copy/if.cu index 4ae222a68..9436cf07f 100644 --- a/benchmarks/bench/copy/if.cu +++ b/benchmarks/bench/copy/if.cu @@ -50,14 +50,16 @@ struct less_then_t struct _if { - template - float64_t - run(thrust::device_vector& input, thrust::device_vector& output, less_then_t select_op) + template + float64_t run(thrust::device_vector& input, + thrust::device_vector& output, + less_then_t select_op, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::copy_if(Policy {}, input.cbegin(), input.cend(), output.begin(), select_op); + thrust::copy_if(policy, input.cbegin(), input.cend(), output.begin(), select_op); d_timer.stop(0); return d_timer.get_duration(); @@ -88,9 +90,12 @@ void run_benchmark(benchmark::State& state, // Output thrust::device_vector output(selected_elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output, select_op); + float64_t duration = benchmark.template run(input, output, select_op, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/fill/basic.cu b/benchmarks/bench/fill/basic.cu index 7cc6d5408..8568431ca 100644 --- a/benchmarks/bench/fill/basic.cu +++ b/benchmarks/bench/fill/basic.cu @@ -39,13 +39,13 @@ template struct basic { - template - float64_t run(thrust::device_vector& output) + template + float64_t run(thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::fill(Policy {}, output.begin(), output.end(), T {Val}); + thrust::fill(policy, output.begin(), output.end(), T {Val}); d_timer.stop(0); return d_timer.get_duration(); @@ -66,9 +66,12 @@ void run_benchmark(benchmark::State& state, // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(output); + float64_t duration = benchmark.template run(output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/for_each/basic.cu b/benchmarks/bench/for_each/basic.cu index cac4963be..9fc6afa84 100644 --- a/benchmarks/bench/for_each/basic.cu +++ b/benchmarks/bench/for_each/basic.cu @@ -47,13 +47,13 @@ struct square_t struct basic { - template - float64_t run(thrust::device_vector& input, square_t op) + template + float64_t run(thrust::device_vector& input, square_t op, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::for_each(Policy {}, input.begin(), input.end(), op); + thrust::for_each(policy, input.begin(), input.end(), op); d_timer.stop(0); return d_timer.get_duration(); @@ -74,9 +74,12 @@ void run_benchmark(benchmark::State& state, // Generate input thrust::device_vector input(elements, 1); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, square_t {}); + float64_t duration = benchmark.template run(input, square_t {}, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/inner_product/basic.cu b/benchmarks/bench/inner_product/basic.cu index 15f52ce5f..cac9a4bee 100644 --- a/benchmarks/bench/inner_product/basic.cu +++ b/benchmarks/bench/inner_product/basic.cu @@ -38,13 +38,13 @@ struct basic { - template - float64_t run(thrust::device_vector& lhs, thrust::device_vector& rhs) + template + float64_t run(thrust::device_vector& lhs, thrust::device_vector& rhs, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::inner_product(Policy {}, lhs.cbegin(), lhs.cend(), rhs.begin(), T {0}); + thrust::inner_product(policy, lhs.cbegin(), lhs.cend(), rhs.begin(), T {0}); d_timer.stop(0); return d_timer.get_duration(); @@ -65,9 +65,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st thrust::device_vector lhs = generator; thrust::device_vector rhs = generator; + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(lhs, rhs); + float64_t duration = benchmark.template run(lhs, rhs, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/merge/basic.cu b/benchmarks/bench/merge/basic.cu index 5b4e3cd32..d128cc610 100644 --- a/benchmarks/bench/merge/basic.cu +++ b/benchmarks/bench/merge/basic.cu @@ -39,15 +39,16 @@ struct basic { - template + template float64_t run(thrust::device_vector& input, thrust::device_vector& output, - const std::size_t elements_in_lhs) + const std::size_t elements_in_lhs, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::merge(Policy {}, + thrust::merge(policy, input.cbegin(), input.cbegin() + elements_in_lhs, input.cbegin() + elements_in_lhs, @@ -85,9 +86,13 @@ void run_benchmark(benchmark::State& state, // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output, elements_in_lhs); + float64_t duration + = benchmark.template run(input, output, elements_in_lhs, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/partition/basic.cu b/benchmarks/bench/partition/basic.cu index 30aabbf8c..91eaf7332 100644 --- a/benchmarks/bench/partition/basic.cu +++ b/benchmarks/bench/partition/basic.cu @@ -51,16 +51,17 @@ struct less_then_t struct basic { - template + template float64_t run(thrust::device_vector& input, thrust::device_vector& output, const std::size_t elements, - less_then_t select_op) + less_then_t select_op, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::copy_if(Policy {}, + thrust::copy_if(policy, input.cbegin(), input.cend(), output.begin(), @@ -94,9 +95,13 @@ void run_benchmark(benchmark::State& state, // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output, elements, select_op); + float64_t duration + = benchmark.template run(input, output, elements, select_op, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/reduce/basic.cu b/benchmarks/bench/reduce/basic.cu index 5b4c9e9f2..e57d438d9 100644 --- a/benchmarks/bench/reduce/basic.cu +++ b/benchmarks/bench/reduce/basic.cu @@ -38,13 +38,13 @@ struct basic { - template - float64_t run(thrust::device_vector& input) + template + float64_t run(thrust::device_vector& input, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::reduce(Policy {}, input.begin(), input.end()); + thrust::reduce(policy, input.begin(), input.end()); d_timer.stop(0); return d_timer.get_duration(); @@ -63,9 +63,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Generate input thrust::device_vector input = bench_utils::generate(elements, seed_type); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input); + float64_t duration = benchmark.template run(input, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/reduce/by_key.cu b/benchmarks/bench/reduce/by_key.cu index 64d7fcd14..acaf1e7e4 100644 --- a/benchmarks/bench/reduce/by_key.cu +++ b/benchmarks/bench/reduce/by_key.cu @@ -39,16 +39,17 @@ struct by_key { - template + template float64_t run(thrust::device_vector& input_keys, thrust::device_vector& input_vals, thrust::device_vector& output_keys, - thrust::device_vector& output_vals) + thrust::device_vector& output_vals, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::reduce_by_key(Policy {}, + thrust::reduce_by_key(policy, input_keys.begin(), input_keys.end(), input_vals.begin(), @@ -85,10 +86,13 @@ void run_benchmark(benchmark::State& state, output_keys.begin(), thrust::unique(output_keys.begin(), output_keys.end())); thrust::device_vector output_vals(unique_keys); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { float64_t duration = benchmark.template run( - input_keys, input_vals, output_keys, output_vals); + input_keys, input_vals, output_keys, output_vals, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/scan/exclusive/by_key.cu b/benchmarks/bench/scan/exclusive/by_key.cu index 39da5e85c..68bdeae09 100644 --- a/benchmarks/bench/scan/exclusive/by_key.cu +++ b/benchmarks/bench/scan/exclusive/by_key.cu @@ -38,15 +38,16 @@ struct by_key { - template + template float64_t run(thrust::device_vector& input_keys, thrust::device_vector& input_vals, - thrust::device_vector& output_vals) + thrust::device_vector& output_vals, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::exclusive_scan_by_key(Policy {}, + thrust::exclusive_scan_by_key(policy, input_keys.cbegin(), input_keys.cend(), input_vals.cbegin(), @@ -74,10 +75,13 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output_vals(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration - = benchmark.template run(input_keys, input_vals, output_vals); + float64_t duration = benchmark.template run( + input_keys, input_vals, output_vals, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/scan/exclusive/max.cu b/benchmarks/bench/scan/exclusive/max.cu index 8f4711da9..888d4a6f5 100644 --- a/benchmarks/bench/scan/exclusive/max.cu +++ b/benchmarks/bench/scan/exclusive/max.cu @@ -24,6 +24,7 @@ #include "../../../bench_utils/bench_utils.hpp" // rocThrust +#include #include #include #include @@ -38,14 +39,14 @@ struct _max { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); thrust::exclusive_scan( - Policy {}, input.cbegin(), input.cend(), output.begin(), T {}, bench_utils::max_t {}); + policy, input.cbegin(), input.cend(), output.begin(), T {}, bench_utils::max_t {}); d_timer.stop(0); return d_timer.get_duration(); @@ -67,9 +68,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/scan/exclusive/sum.cu b/benchmarks/bench/scan/exclusive/sum.cu index b44a1444e..a189a5923 100644 --- a/benchmarks/bench/scan/exclusive/sum.cu +++ b/benchmarks/bench/scan/exclusive/sum.cu @@ -38,13 +38,13 @@ struct sum { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::exclusive_scan(Policy {}, input.cbegin(), input.cend(), output.begin()); + thrust::exclusive_scan(policy, input.cbegin(), input.cend(), output.begin()); d_timer.stop(0); return d_timer.get_duration(); @@ -66,9 +66,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/scan/inclusive/by_key.cu b/benchmarks/bench/scan/inclusive/by_key.cu index 36453f6ed..ee6006813 100644 --- a/benchmarks/bench/scan/inclusive/by_key.cu +++ b/benchmarks/bench/scan/inclusive/by_key.cu @@ -38,15 +38,16 @@ struct by_key { - template + template float64_t run(thrust::device_vector& input_keys, thrust::device_vector& input_vals, - thrust::device_vector& output_vals) + thrust::device_vector& output_vals, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::inclusive_scan_by_key(Policy {}, + thrust::inclusive_scan_by_key(policy, input_keys.cbegin(), input_keys.cend(), input_vals.cbegin(), @@ -74,10 +75,13 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output_vals(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration - = benchmark.template run(input_keys, input_vals, output_vals); + float64_t duration = benchmark.template run( + input_keys, input_vals, output_vals, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/scan/inclusive/max.cu b/benchmarks/bench/scan/inclusive/max.cu index bed0085ff..99b91163e 100644 --- a/benchmarks/bench/scan/inclusive/max.cu +++ b/benchmarks/bench/scan/inclusive/max.cu @@ -38,14 +38,14 @@ struct _max { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); thrust::inclusive_scan( - Policy {}, input.cbegin(), input.cend(), output.begin(), bench_utils::max_t {}); + policy, input.cbegin(), input.cend(), output.begin(), bench_utils::max_t {}); d_timer.stop(0); return d_timer.get_duration(); @@ -68,9 +68,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/scan/inclusive/sum.cu b/benchmarks/bench/scan/inclusive/sum.cu index ed9c0bc3f..42100e67f 100644 --- a/benchmarks/bench/scan/inclusive/sum.cu +++ b/benchmarks/bench/scan/inclusive/sum.cu @@ -39,12 +39,12 @@ struct sum { template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::inclusive_scan(Policy {}, input.cbegin(), input.cend(), output.begin()); + thrust::inclusive_scan(policy, input.cbegin(), input.cend(), output.begin()); d_timer.stop(0); return d_timer.get_duration(); @@ -66,9 +66,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Output thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/set_operations/base.hpp b/benchmarks/bench/set_operations/base.hpp index bfd0bcd0c..171598bb3 100644 --- a/benchmarks/bench/set_operations/base.hpp +++ b/benchmarks/bench/set_operations/base.hpp @@ -42,16 +42,17 @@ struct basic { - template + template float64_t run(thrust::device_vector& input, thrust::device_vector& output, const std::size_t elements_in_A, - const OpT op) + const OpT op, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - op(Policy {}, + op(policy, input.cbegin(), input.cbegin() + elements_in_A, input.cbegin() + elements_in_A, @@ -96,9 +97,13 @@ void run_benchmark(benchmark::State& state, input.cend(), output.begin())); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output, elements_in_A, op); + float64_t duration + = benchmark.template run(input, output, elements_in_A, op, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/set_operations/by_key.hpp b/benchmarks/bench/set_operations/by_key.hpp index e30b3b8bc..e2cc86ad3 100644 --- a/benchmarks/bench/set_operations/by_key.hpp +++ b/benchmarks/bench/set_operations/by_key.hpp @@ -42,21 +42,19 @@ struct by_key { - template + template float64_t run(thrust::device_vector& input_keys, thrust::device_vector& input_vals, thrust::device_vector& output_keys, thrust::device_vector& output_vals, const std::size_t elements_in_A, - const OpT op) + const OpT op, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - op(Policy {}, + op(policy, input_keys.cbegin(), input_keys.cbegin() + elements_in_A, input_keys.cbegin() + elements_in_A, @@ -111,10 +109,13 @@ void run_benchmark(benchmark::State& state, const std::size_t elements_in_AB = thrust::distance(output_keys.begin(), result_ends.first); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { float64_t duration = benchmark.template run( - input_keys, input_vals, output_keys, output_vals, elements_in_A, op); + input_keys, input_vals, output_keys, output_vals, elements_in_A, op, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/shuffle/basic.cu b/benchmarks/bench/shuffle/basic.cu index adf468441..aa317a684 100644 --- a/benchmarks/bench/shuffle/basic.cu +++ b/benchmarks/bench/shuffle/basic.cu @@ -41,14 +41,14 @@ struct basic { - template - float64_t run(thrust::device_vector& data, const std::string rng_engine) + template + float64_t run(thrust::device_vector& data, const std::string rng_engine, Policy policy) { auto do_engine = [&](auto&& engine_constructor) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::shuffle(Policy {}, data.begin(), data.end(), engine_constructor()); + thrust::shuffle(policy, data.begin(), data.end(), engine_constructor()); d_timer.stop(0); return d_timer.get_duration(); @@ -90,9 +90,12 @@ void run_benchmark(benchmark::State& state, // Generate input thrust::device_vector data(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(data, rng_engine); + float64_t duration = benchmark.template run(data, rng_engine, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/sort/keys.cu b/benchmarks/bench/sort/keys.cu index a293a21bb..5e1c472c1 100644 --- a/benchmarks/bench/sort/keys.cu +++ b/benchmarks/bench/sort/keys.cu @@ -38,15 +38,15 @@ struct keys { - template - float64_t run(thrust::device_vector& input) + template + float64_t run(thrust::device_vector& input, Policy policy) { thrust::device_vector vec = input; bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::sort(Policy {}, input.begin(), input.end()); + thrust::sort(policy, input.begin(), input.end()); d_timer.stop(0); return d_timer.get_duration(); @@ -69,9 +69,12 @@ void run_benchmark(benchmark::State& state, const auto entropy = bench_utils::get_entropy_percentage(entropy_reduction) / 100.0f; thrust::device_vector input = bench_utils::generate(elements, seed_type, entropy); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input); + float64_t duration = benchmark.template run(input, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/sort/keys_custom.cu b/benchmarks/bench/sort/keys_custom.cu index 16e2922b7..6333a1177 100644 --- a/benchmarks/bench/sort/keys_custom.cu +++ b/benchmarks/bench/sort/keys_custom.cu @@ -38,15 +38,15 @@ struct keys_custom { - template - float64_t run(thrust::device_vector& input) + template + float64_t run(thrust::device_vector& input, Policy policy) { thrust::device_vector vec = input; bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::sort(Policy {}, input.begin(), input.end(), bench_utils::less_t {}); + thrust::sort(policy, input.begin(), input.end(), bench_utils::less_t {}); d_timer.stop(0); return d_timer.get_duration(); @@ -69,9 +69,12 @@ void run_benchmark(benchmark::State& state, const auto entropy = bench_utils::get_entropy_percentage(entropy_reduction) / 100.0f; thrust::device_vector input = bench_utils::generate(elements, seed_type, entropy); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input); + float64_t duration = benchmark.template run(input, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/sort/pairs.cu b/benchmarks/bench/sort/pairs.cu index 6c5a32f4a..25568ac1d 100644 --- a/benchmarks/bench/sort/pairs.cu +++ b/benchmarks/bench/sort/pairs.cu @@ -38,13 +38,14 @@ struct pairs { - template - float64_t run(thrust::device_vector& keys, thrust::device_vector& vals) + template + float64_t + run(thrust::device_vector& keys, thrust::device_vector& vals, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::sort_by_key(Policy {}, keys.begin(), keys.end(), vals.begin()); + thrust::sort_by_key(policy, keys.begin(), keys.end(), vals.begin()); d_timer.stop(0); return d_timer.get_duration(); @@ -68,9 +69,12 @@ void run_benchmark(benchmark::State& state, thrust::device_vector keys = bench_utils::generate(elements, seed_type, entropy); thrust::device_vector vals = bench_utils::generate(elements, seed_type); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(keys, vals); + float64_t duration = benchmark.template run(keys, vals, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/sort/pairs_custom.cu b/benchmarks/bench/sort/pairs_custom.cu index 90cff7001..2953df161 100644 --- a/benchmarks/bench/sort/pairs_custom.cu +++ b/benchmarks/bench/sort/pairs_custom.cu @@ -38,14 +38,14 @@ struct pairs_custom { - template - float64_t run(thrust::device_vector& keys, thrust::device_vector& vals) + template + float64_t + run(thrust::device_vector& keys, thrust::device_vector& vals, Policy policy) { - bench_utils::gpu_timer d_timer; + bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::sort_by_key( - Policy {}, keys.begin(), keys.end(), vals.begin(), bench_utils::less_t {}); + thrust::sort_by_key(policy, keys.begin(), keys.end(), vals.begin(), bench_utils::less_t {}); d_timer.stop(0); return d_timer.get_duration(); @@ -69,9 +69,12 @@ void run_benchmark(benchmark::State& state, thrust::device_vector keys = bench_utils::generate(elements, seed_type, entropy); thrust::device_vector vals = bench_utils::generate(elements, seed_type); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(keys, vals); + float64_t duration = benchmark.template run(keys, vals, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/tabulate/basic.cu b/benchmarks/bench/tabulate/basic.cu index 99148008b..8354dc76a 100644 --- a/benchmarks/bench/tabulate/basic.cu +++ b/benchmarks/bench/tabulate/basic.cu @@ -50,13 +50,13 @@ struct seg_size_t struct basic { - template - float64_t run(thrust::device_vector& output, seg_size_t op) + template + float64_t run(thrust::device_vector& output, seg_size_t op, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::tabulate(Policy {}, output.begin(), output.end(), op); + thrust::tabulate(policy, output.begin(), output.end(), op); d_timer.stop(0); return d_timer.get_duration(); @@ -80,9 +80,12 @@ void run_benchmark(benchmark::State& state, seg_size_t op {thrust::raw_pointer_cast(input.data())}; + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(output, op); + float64_t duration = benchmark.template run(output, op, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/transform/basic.cu b/benchmarks/bench/transform/basic.cu index 9ce5b7fc9..6c1dfe339 100644 --- a/benchmarks/bench/transform/basic.cu +++ b/benchmarks/bench/transform/basic.cu @@ -25,8 +25,8 @@ // rocThrust #include -#include #include +#include #include // Google Benchmark @@ -70,14 +70,14 @@ struct fib_t struct basic { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); thrust::transform( - Policy {}, input.cbegin(), input.cend(), output.begin(), fib_t {}); + policy, input.cbegin(), input.cend(), output.begin(), fib_t {}); d_timer.stop(0); return d_timer.get_duration(); @@ -101,9 +101,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st T {42} /*magic number used in Thrust*/); thrust::device_vector output(elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/transform_reduce/sum.cu b/benchmarks/bench/transform_reduce/sum.cu index 38f266446..7676074a4 100644 --- a/benchmarks/bench/transform_reduce/sum.cu +++ b/benchmarks/bench/transform_reduce/sum.cu @@ -38,14 +38,14 @@ struct sum { - template - float64_t run(thrust::device_vector& input) + template + float64_t run(thrust::device_vector& input, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); bench_utils::do_not_optimize( - thrust::reduce(Policy {}, input.begin(), input.end(), T {}, thrust::plus {})); + thrust::reduce(policy, input.begin(), input.end(), T {}, thrust::plus {})); d_timer.stop(0); return d_timer.get_duration(); @@ -64,9 +64,12 @@ void run_benchmark(benchmark::State& state, const std::size_t elements, const st // Generate input thrust::device_vector input = bench_utils::generate(elements, seed_type); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input); + float64_t duration = benchmark.template run(input, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/unique/basic.cu b/benchmarks/bench/unique/basic.cu index 3d43569ec..53afe5be4 100644 --- a/benchmarks/bench/unique/basic.cu +++ b/benchmarks/bench/unique/basic.cu @@ -39,13 +39,13 @@ struct basic { - template - float64_t run(thrust::device_vector& input, thrust::device_vector& output) + template + float64_t run(thrust::device_vector& input, thrust::device_vector& output, Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::unique_copy(Policy {}, input.cbegin(), input.cend(), output.begin()); + thrust::unique_copy(policy, input.cbegin(), input.cend(), output.begin()); d_timer.stop(0); return d_timer.get_duration(); @@ -77,9 +77,12 @@ void run_benchmark(benchmark::State& state, thrust::unique_copy( thrust::detail::device_t {}, input.cbegin(), input.cend(), output.begin())); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output); + float64_t duration = benchmark.template run(input, output, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/unique/by_key.cu b/benchmarks/bench/unique/by_key.cu index 9817a960d..e8b3a79b1 100644 --- a/benchmarks/bench/unique/by_key.cu +++ b/benchmarks/bench/unique/by_key.cu @@ -39,16 +39,17 @@ struct by_key { - template + template float64_t run(thrust::device_vector& input_keys, thrust::device_vector& input_vals, thrust::device_vector& output_keys, - thrust::device_vector& output_vals) + thrust::device_vector& output_vals, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::unique_by_key_copy(Policy {}, + thrust::unique_by_key_copy(policy, input_keys.cbegin(), input_keys.cend(), input_vals.cbegin(), @@ -85,10 +86,13 @@ void run_benchmark(benchmark::State& state, thrust::unique_copy(input_keys.cbegin(), input_keys.cend(), output_keys.begin())); thrust::device_vector output_vals(unique_elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { float64_t duration = benchmark.template run( - input_keys, input_vals, output_keys, output_vals); + input_keys, input_vals, output_keys, output_vals, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/vectorized_search/basic.cu b/benchmarks/bench/vectorized_search/basic.cu index 66b37b7f9..6a7b966d0 100644 --- a/benchmarks/bench/vectorized_search/basic.cu +++ b/benchmarks/bench/vectorized_search/basic.cu @@ -42,12 +42,13 @@ struct basic template float64_t run(thrust::device_vector& input, thrust::device_vector& output, - const std::size_t elements) + const std::size_t elements, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::lower_bound(Policy {}, + thrust::lower_bound(policy, input.begin(), input.begin() + elements, input.begin() + elements, @@ -79,9 +80,12 @@ void run_benchmark(benchmark::State& state, thrust::device_vector output(needles); thrust::sort(input.begin(), input.begin() + elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output, elements); + float64_t duration = benchmark.template run(input, output, elements, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/vectorized_search/lower_bound.cu b/benchmarks/bench/vectorized_search/lower_bound.cu index 6b1253f9e..aab8d8463 100644 --- a/benchmarks/bench/vectorized_search/lower_bound.cu +++ b/benchmarks/bench/vectorized_search/lower_bound.cu @@ -39,15 +39,16 @@ struct lower_bound { - template + template float64_t run(thrust::device_vector& input, thrust::device_vector& output, - const std::size_t elements) + const std::size_t elements, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::lower_bound(Policy {}, + thrust::lower_bound(policy, input.begin(), input.begin() + elements, input.begin() + elements, @@ -79,9 +80,12 @@ void run_benchmark(benchmark::State& state, thrust::device_vector output(needles); thrust::sort(input.begin(), input.begin() + elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output, elements); + float64_t duration = benchmark.template run(input, output, elements, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench/vectorized_search/upper_bound.cu b/benchmarks/bench/vectorized_search/upper_bound.cu index df7fd5c17..b0b842e8f 100644 --- a/benchmarks/bench/vectorized_search/upper_bound.cu +++ b/benchmarks/bench/vectorized_search/upper_bound.cu @@ -39,15 +39,16 @@ struct upper_bound { - template + template float64_t run(thrust::device_vector& input, thrust::device_vector& output, - const std::size_t elements) + const std::size_t elements, + Policy policy) { bench_utils::gpu_timer d_timer; d_timer.start(0); - thrust::upper_bound(Policy {}, + thrust::upper_bound(policy, input.begin(), input.begin() + elements, input.begin() + elements, @@ -79,9 +80,12 @@ void run_benchmark(benchmark::State& state, thrust::device_vector output(needles); thrust::sort(input.begin(), input.begin() + elements); + bench_utils::caching_allocator_t alloc {}; + thrust::detail::device_t policy {}; + for(auto _ : state) { - float64_t duration = benchmark.template run(input, output, elements); + float64_t duration = benchmark.template run(input, output, elements, policy(alloc)); state.SetIterationTime(duration); gpu_times.push_back(duration); } diff --git a/benchmarks/bench_utils/bench_utils.hpp b/benchmarks/bench_utils/bench_utils.hpp index 3db22ccc0..cb32e2ead 100644 --- a/benchmarks/bench_utils/bench_utils.hpp +++ b/benchmarks/bench_utils/bench_utils.hpp @@ -29,6 +29,9 @@ #include "custom_reporter.hpp" #include "generation_utils.hpp" +#include +#include + // HIP/CUDA #if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_HIP #include @@ -634,6 +637,68 @@ inline const char* get_seed_message() "or 'random' for different inputs for each repetition"; } +struct caching_allocator_t +{ + using value_type = char; + + caching_allocator_t() = default; + ~caching_allocator_t() + { + free_all(); + } + + char* allocate(std::ptrdiff_t num_bytes) + { + value_type* result {}; + auto free_block = free_blocks.find(num_bytes); + if(free_block != free_blocks.end()) + { + result = free_block->second; + free_blocks.erase(free_block); + } + else + { + HIP_CHECK(hipMalloc(&result, num_bytes)); + } + + allocated_blocks.emplace(result, num_bytes); + return result; + } + + void deallocate(value_type* ptr, size_t) + { + auto iter = allocated_blocks.find(ptr); + if(iter == allocated_blocks.end()) + { + throw std::runtime_error("Memory was not allocated by this allocator"); + } + + std::ptrdiff_t num_bytes = iter->second; + allocated_blocks.erase(iter); + free_blocks.emplace(num_bytes, ptr); + } + +private: + using FreeBlocksType = std::multimap; + using AllocatedBlocksType = std::map; + + FreeBlocksType free_blocks; + AllocatedBlocksType allocated_blocks; + + void free_all() + { + for(auto free_block : free_blocks) + { + HIP_CHECK(hipFree(free_block.second)); + } + + for(auto allocated_block : allocated_blocks) + { + HIP_CHECK(hipFree(allocated_block.first)); + } + } +}; + } // namespace bench_utils #endif // ROCTHRUST_BENCHMARKS_BENCH_UTILS_BENCH_UTILS_HPP_ From aa64ae703da74fdd2552158395b6e6348002b7a9 Mon Sep 17 00:00:00 2001 From: Nick Breed Date: Thu, 8 Aug 2024 08:52:13 +0000 Subject: [PATCH 28/44] Changes for review --- benchmarks/bench_utils/bench_utils.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/bench_utils/bench_utils.hpp b/benchmarks/bench_utils/bench_utils.hpp index cb32e2ead..94b11258a 100644 --- a/benchmarks/bench_utils/bench_utils.hpp +++ b/benchmarks/bench_utils/bench_utils.hpp @@ -29,7 +29,6 @@ #include "custom_reporter.hpp" #include "generation_utils.hpp" -#include #include // HIP/CUDA @@ -54,6 +53,7 @@ #include #include +#include #include namespace bench_utils From 0673125e3b4902ab653ebeac7be7df99bf8c4282 Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Fri, 26 Jul 2024 13:31:28 +0200 Subject: [PATCH 29/44] ci: set up sccache --- .gitlab-ci.yml | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e971bff8c..b83ca3b65 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,6 +12,7 @@ include: - /deps-rocm.yaml - /deps-windows.yaml - /deps-nvcc.yaml + - /deps-compiler-acceleration.yaml - /gpus-rocm.yaml - /gpus-nvcc.yaml - /rules.yaml @@ -46,17 +47,21 @@ copyright-date: extends: - .deps:rocm - .deps:cmake-latest + - .deps:compiler-acceleration before_script: - !reference [".deps:rocm", before_script] - !reference [".deps:cmake-latest", before_script] + - !reference [".deps:compiler-acceleration", before_script] .cmake-minimum: extends: - .deps:rocm - .deps:cmake-minimum + - .deps:compiler-acceleration before_script: - !reference [".deps:rocm", before_script] - !reference [".deps:cmake-minimum", before_script] + - !reference [".deps:compiler-acceleration", before_script] .install-rocprim: script: @@ -71,6 +76,8 @@ copyright-date: -D BUILD_TEST=OFF -D BUILD_EXAMPLE=OFF -D ROCM_DEP_ROCMCORE=OFF + -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c + -D CMAKE_CXX_COMPILER_LAUNCHER=phc_sccache_cxx -S $ROCPRIM_DIR -B $ROCPRIM_DIR/build - cd $ROCPRIM_DIR/build @@ -91,7 +98,7 @@ copyright-date: - !reference [.install-rocprim, script] - | # Setup env vars for testing rng_seed_count=0; prng_seeds="0"; - if [[ $CI_COMMIT_BRANCH == "develop_stream" ]]; then + if [[ $CI_COMMIT_BRANCH == "develop_stream" ]]; then rng_seed_count=3 prng_seeds="0, 1000" fi @@ -111,6 +118,9 @@ copyright-date: -D AMDGPU_TEST_TARGETS=$GPU_TARGETS -D RNG_SEED_COUNT=$rng_seed_count -D PRNG_SEEDS=$prng_seeds + -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c + -D CMAKE_CXX_COMPILER_LAUNCHER=phc_sccache_cxx + -D CMAKE_CUDA_COMPILER_LAUNCHER=phc_sccache_cuda -S $CI_PROJECT_DIR -B $CI_PROJECT_DIR/build - cmake --build $CI_PROJECT_DIR/build @@ -198,10 +208,10 @@ build:windows: -D CMAKE_INSTALL_PREFIX:PATH="$ROCPRIM_DIR/build/install" *>&1 - \& cmake --build "$ROCPRIM_DIR/build" --target install *>&1 # Configure and build rocThrust - - \& cmake - -S "$CI_PROJECT_DIR" - -B "$CI_PROJECT_DIR/build" - -G Ninja + - \& cmake + -S "$CI_PROJECT_DIR" + -B "$CI_PROJECT_DIR/build" + -G Ninja -D CMAKE_BUILD_TYPE=Release -D GPU_TARGETS=$GPU_TARGET -D BUILD_TEST=ON @@ -327,10 +337,12 @@ test:rocm-windows-install: - .deps:nvcc - .gpus:nvcc-gpus - .deps:cmake-latest + - .deps:compiler-acceleration - .rules:manual before_script: - !reference [".deps:nvcc", before_script] - !reference [".deps:cmake-latest", before_script] + - !reference [".deps:compiler-acceleration", before_script] build:cuda-and-omp: stage: build @@ -361,6 +373,9 @@ build:cuda-and-omp: -D THRUST_ENABLE_MULTICONFIG=ON -D THRUST_MULTICONFIG_ENABLE_SYSTEM_OMP=ON -D THRUST_MULTICONFIG_ENABLE_SYSTEM_CUDA=ON + -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c + -D CMAKE_CXX_COMPILER_LAUNCHER=phc_sccache_cxx + -D CMAKE_CUDA_COMPILER_LAUNCHER=phc_sccache_cuda -B $CI_PROJECT_DIR/build -S $CCCL_DIR/thrust - cmake --build $CI_PROJECT_DIR/build From 75c44cf6f9db9ff4a8666d700a97f4997c3d2599 Mon Sep 17 00:00:00 2001 From: Nick Breed Date: Thu, 8 Aug 2024 09:19:00 +0000 Subject: [PATCH 30/44] Added helper functions for choosing between different custom reporter --- benchmarks/bench/adjacent_difference/basic.cu | 2 +- .../bench/adjacent_difference/custom.cu | 2 +- .../bench/adjacent_difference/in_place.cu | 2 +- benchmarks/bench/copy/basic.cu | 2 +- benchmarks/bench/copy/if.cu | 2 +- benchmarks/bench/fill/basic.cu | 2 +- benchmarks/bench/for_each/basic.cu | 2 +- benchmarks/bench/inner_product/basic.cu | 2 +- benchmarks/bench/merge/basic.cu | 2 +- benchmarks/bench/partition/basic.cu | 2 +- benchmarks/bench/reduce/basic.cu | 2 +- benchmarks/bench/reduce/by_key.cu | 2 +- benchmarks/bench/scan/exclusive/by_key.cu | 2 +- benchmarks/bench/scan/exclusive/max.cu | 2 +- benchmarks/bench/scan/exclusive/sum.cu | 2 +- benchmarks/bench/scan/inclusive/by_key.cu | 2 +- benchmarks/bench/scan/inclusive/max.cu | 2 +- benchmarks/bench/scan/inclusive/sum.cu | 2 +- benchmarks/bench/set_operations/difference.cu | 2 +- .../bench/set_operations/difference_by_key.cu | 2 +- .../bench/set_operations/intersection.cu | 2 +- .../set_operations/intersection_by_key.cu | 2 +- .../set_operations/symmetric_difference.cu | 2 +- .../symmetric_difference_by_key.cu | 2 +- benchmarks/bench/set_operations/union.cu | 2 +- .../bench/set_operations/union_by_key.cu | 2 +- benchmarks/bench/shuffle/basic.cu | 2 +- benchmarks/bench/sort/keys.cu | 2 +- benchmarks/bench/sort/keys_custom.cu | 2 +- benchmarks/bench/sort/pairs.cu | 2 +- benchmarks/bench/sort/pairs_custom.cu | 2 +- benchmarks/bench/tabulate/basic.cu | 2 +- benchmarks/bench/transform/basic.cu | 2 +- benchmarks/bench/transform_reduce/sum.cu | 2 +- benchmarks/bench/unique/basic.cu | 2 +- benchmarks/bench/unique/by_key.cu | 2 +- benchmarks/bench/vectorized_search/basic.cu | 2 +- .../bench/vectorized_search/lower_bound.cu | 2 +- .../bench/vectorized_search/upper_bound.cu | 2 +- benchmarks/bench_utils/custom_reporter.hpp | 39 ++++++++++++++++++- 40 files changed, 77 insertions(+), 40 deletions(-) diff --git a/benchmarks/bench/adjacent_difference/basic.cu b/benchmarks/bench/adjacent_difference/basic.cu index 189ec34b2..6733c563c 100644 --- a/benchmarks/bench/adjacent_difference/basic.cu +++ b/benchmarks/bench/adjacent_difference/basic.cu @@ -144,7 +144,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/adjacent_difference/custom.cu b/benchmarks/bench/adjacent_difference/custom.cu index b5c41003c..24d6c031e 100644 --- a/benchmarks/bench/adjacent_difference/custom.cu +++ b/benchmarks/bench/adjacent_difference/custom.cu @@ -164,7 +164,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/adjacent_difference/in_place.cu b/benchmarks/bench/adjacent_difference/in_place.cu index 75c8fd44f..154ce0062 100644 --- a/benchmarks/bench/adjacent_difference/in_place.cu +++ b/benchmarks/bench/adjacent_difference/in_place.cu @@ -141,7 +141,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/copy/basic.cu b/benchmarks/bench/copy/basic.cu index bf278adda..52f22c5fc 100644 --- a/benchmarks/bench/copy/basic.cu +++ b/benchmarks/bench/copy/basic.cu @@ -146,7 +146,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/copy/if.cu b/benchmarks/bench/copy/if.cu index 9436cf07f..cfbe7eb91 100644 --- a/benchmarks/bench/copy/if.cu +++ b/benchmarks/bench/copy/if.cu @@ -175,7 +175,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/fill/basic.cu b/benchmarks/bench/fill/basic.cu index 8568431ca..f88b0fc5b 100644 --- a/benchmarks/bench/fill/basic.cu +++ b/benchmarks/bench/fill/basic.cu @@ -143,7 +143,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/for_each/basic.cu b/benchmarks/bench/for_each/basic.cu index 9fc6afa84..086d2a72e 100644 --- a/benchmarks/bench/for_each/basic.cu +++ b/benchmarks/bench/for_each/basic.cu @@ -152,7 +152,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/inner_product/basic.cu b/benchmarks/bench/inner_product/basic.cu index cac9a4bee..67a9b62ef 100644 --- a/benchmarks/bench/inner_product/basic.cu +++ b/benchmarks/bench/inner_product/basic.cu @@ -149,7 +149,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/merge/basic.cu b/benchmarks/bench/merge/basic.cu index d128cc610..f16a7baa0 100644 --- a/benchmarks/bench/merge/basic.cu +++ b/benchmarks/bench/merge/basic.cu @@ -178,7 +178,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/partition/basic.cu b/benchmarks/bench/partition/basic.cu index 91eaf7332..dfa5de12e 100644 --- a/benchmarks/bench/partition/basic.cu +++ b/benchmarks/bench/partition/basic.cu @@ -181,7 +181,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/reduce/basic.cu b/benchmarks/bench/reduce/basic.cu index e57d438d9..43de36f44 100644 --- a/benchmarks/bench/reduce/basic.cu +++ b/benchmarks/bench/reduce/basic.cu @@ -141,7 +141,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/reduce/by_key.cu b/benchmarks/bench/reduce/by_key.cu index acaf1e7e4..86a9794ba 100644 --- a/benchmarks/bench/reduce/by_key.cu +++ b/benchmarks/bench/reduce/by_key.cu @@ -194,7 +194,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/scan/exclusive/by_key.cu b/benchmarks/bench/scan/exclusive/by_key.cu index 68bdeae09..5c212a074 100644 --- a/benchmarks/bench/scan/exclusive/by_key.cu +++ b/benchmarks/bench/scan/exclusive/by_key.cu @@ -173,7 +173,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/scan/exclusive/max.cu b/benchmarks/bench/scan/exclusive/max.cu index 888d4a6f5..74c5f6b4e 100644 --- a/benchmarks/bench/scan/exclusive/max.cu +++ b/benchmarks/bench/scan/exclusive/max.cu @@ -151,7 +151,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/scan/exclusive/sum.cu b/benchmarks/bench/scan/exclusive/sum.cu index a189a5923..0a9ccbdd7 100644 --- a/benchmarks/bench/scan/exclusive/sum.cu +++ b/benchmarks/bench/scan/exclusive/sum.cu @@ -149,7 +149,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/scan/inclusive/by_key.cu b/benchmarks/bench/scan/inclusive/by_key.cu index ee6006813..5e9848d35 100644 --- a/benchmarks/bench/scan/inclusive/by_key.cu +++ b/benchmarks/bench/scan/inclusive/by_key.cu @@ -173,7 +173,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/scan/inclusive/max.cu b/benchmarks/bench/scan/inclusive/max.cu index 99b91163e..e131b5bbb 100644 --- a/benchmarks/bench/scan/inclusive/max.cu +++ b/benchmarks/bench/scan/inclusive/max.cu @@ -151,7 +151,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/scan/inclusive/sum.cu b/benchmarks/bench/scan/inclusive/sum.cu index 42100e67f..0d7957991 100644 --- a/benchmarks/bench/scan/inclusive/sum.cu +++ b/benchmarks/bench/scan/inclusive/sum.cu @@ -149,7 +149,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/difference.cu b/benchmarks/bench/set_operations/difference.cu index bc7191a3f..20db948b6 100644 --- a/benchmarks/bench/set_operations/difference.cu +++ b/benchmarks/bench/set_operations/difference.cu @@ -74,7 +74,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/difference_by_key.cu b/benchmarks/bench/set_operations/difference_by_key.cu index 8ae2e5a92..beabc67c3 100644 --- a/benchmarks/bench/set_operations/difference_by_key.cu +++ b/benchmarks/bench/set_operations/difference_by_key.cu @@ -93,7 +93,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/intersection.cu b/benchmarks/bench/set_operations/intersection.cu index 36a4ef77c..65f1fbc24 100644 --- a/benchmarks/bench/set_operations/intersection.cu +++ b/benchmarks/bench/set_operations/intersection.cu @@ -74,7 +74,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/intersection_by_key.cu b/benchmarks/bench/set_operations/intersection_by_key.cu index 637f61b2b..77ddd5f22 100644 --- a/benchmarks/bench/set_operations/intersection_by_key.cu +++ b/benchmarks/bench/set_operations/intersection_by_key.cu @@ -92,7 +92,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/symmetric_difference.cu b/benchmarks/bench/set_operations/symmetric_difference.cu index 7bbe5d9bb..b6365bbf2 100644 --- a/benchmarks/bench/set_operations/symmetric_difference.cu +++ b/benchmarks/bench/set_operations/symmetric_difference.cu @@ -74,7 +74,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/symmetric_difference_by_key.cu b/benchmarks/bench/set_operations/symmetric_difference_by_key.cu index 529118475..6a3aeca55 100644 --- a/benchmarks/bench/set_operations/symmetric_difference_by_key.cu +++ b/benchmarks/bench/set_operations/symmetric_difference_by_key.cu @@ -93,7 +93,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/union.cu b/benchmarks/bench/set_operations/union.cu index 7ac93f9d2..e1832a715 100644 --- a/benchmarks/bench/set_operations/union.cu +++ b/benchmarks/bench/set_operations/union.cu @@ -74,7 +74,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/set_operations/union_by_key.cu b/benchmarks/bench/set_operations/union_by_key.cu index 1ca05e83b..af9d7e92f 100644 --- a/benchmarks/bench/set_operations/union_by_key.cu +++ b/benchmarks/bench/set_operations/union_by_key.cu @@ -93,7 +93,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/shuffle/basic.cu b/benchmarks/bench/shuffle/basic.cu index aa317a684..82202fde7 100644 --- a/benchmarks/bench/shuffle/basic.cu +++ b/benchmarks/bench/shuffle/basic.cu @@ -177,7 +177,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/sort/keys.cu b/benchmarks/bench/sort/keys.cu index 5e1c472c1..186ab723c 100644 --- a/benchmarks/bench/sort/keys.cu +++ b/benchmarks/bench/sort/keys.cu @@ -159,7 +159,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/sort/keys_custom.cu b/benchmarks/bench/sort/keys_custom.cu index 6333a1177..712dfede8 100644 --- a/benchmarks/bench/sort/keys_custom.cu +++ b/benchmarks/bench/sort/keys_custom.cu @@ -159,7 +159,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/sort/pairs.cu b/benchmarks/bench/sort/pairs.cu index 25568ac1d..61f8bf922 100644 --- a/benchmarks/bench/sort/pairs.cu +++ b/benchmarks/bench/sort/pairs.cu @@ -161,7 +161,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/sort/pairs_custom.cu b/benchmarks/bench/sort/pairs_custom.cu index 2953df161..0ba785b06 100644 --- a/benchmarks/bench/sort/pairs_custom.cu +++ b/benchmarks/bench/sort/pairs_custom.cu @@ -161,7 +161,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/tabulate/basic.cu b/benchmarks/bench/tabulate/basic.cu index 8354dc76a..2c1fe5ef1 100644 --- a/benchmarks/bench/tabulate/basic.cu +++ b/benchmarks/bench/tabulate/basic.cu @@ -154,7 +154,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/transform/basic.cu b/benchmarks/bench/transform/basic.cu index 6c1dfe339..a47b0ab28 100644 --- a/benchmarks/bench/transform/basic.cu +++ b/benchmarks/bench/transform/basic.cu @@ -175,7 +175,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/transform_reduce/sum.cu b/benchmarks/bench/transform_reduce/sum.cu index 7676074a4..6dc60ec67 100644 --- a/benchmarks/bench/transform_reduce/sum.cu +++ b/benchmarks/bench/transform_reduce/sum.cu @@ -148,7 +148,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/unique/basic.cu b/benchmarks/bench/unique/basic.cu index 53afe5be4..6097f6d4d 100644 --- a/benchmarks/bench/unique/basic.cu +++ b/benchmarks/bench/unique/basic.cu @@ -166,7 +166,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/unique/by_key.cu b/benchmarks/bench/unique/by_key.cu index e8b3a79b1..e54dc291b 100644 --- a/benchmarks/bench/unique/by_key.cu +++ b/benchmarks/bench/unique/by_key.cu @@ -191,7 +191,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/vectorized_search/basic.cu b/benchmarks/bench/vectorized_search/basic.cu index 6a7b966d0..d69ecc059 100644 --- a/benchmarks/bench/vectorized_search/basic.cu +++ b/benchmarks/bench/vectorized_search/basic.cu @@ -158,7 +158,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/vectorized_search/lower_bound.cu b/benchmarks/bench/vectorized_search/lower_bound.cu index aab8d8463..4784161e6 100644 --- a/benchmarks/bench/vectorized_search/lower_bound.cu +++ b/benchmarks/bench/vectorized_search/lower_bound.cu @@ -158,7 +158,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench/vectorized_search/upper_bound.cu b/benchmarks/bench/vectorized_search/upper_bound.cu index b0b842e8f..90157035d 100644 --- a/benchmarks/bench/vectorized_search/upper_bound.cu +++ b/benchmarks/bench/vectorized_search/upper_bound.cu @@ -158,7 +158,7 @@ int main(int argc, char* argv[]) } // Run benchmarks - benchmark::RunSpecifiedBenchmarks(new bench_utils::CustomReporter); + benchmark::RunSpecifiedBenchmarks(bench_utils::ChooseCustomReporter()); // Finish benchmark::Shutdown(); diff --git a/benchmarks/bench_utils/custom_reporter.hpp b/benchmarks/bench_utils/custom_reporter.hpp index e130366a9..3d7d55b06 100644 --- a/benchmarks/bench_utils/custom_reporter.hpp +++ b/benchmarks/bench_utils/custom_reporter.hpp @@ -44,6 +44,13 @@ namespace bench_utils { + +template +bool IsType(const SrcType* src) +{ + return dynamic_cast(src) != nullptr; +} + /// \brief Custom Google Benchmark reporter for formatting the benchmarks' report matching Thrust's. /// /// This reporter is a ConsoleReporter that outputs: @@ -60,7 +67,7 @@ namespace bench_utils /// repeated \p repetitions times to measure the stability of results. In this case, the mean, /// median, standard deviation (stddev) and coefficient of variation (cv) of the above-described /// metrics are also reported after all the \p repetitions have ben run. -class CustomReporter : public benchmark::ConsoleReporter +class CustomConsoleReporter : public benchmark::ConsoleReporter { private: enum LogColor @@ -409,5 +416,35 @@ class CustomReporter : public benchmark::ConsoleReporter } } }; + +using CustomJSONReporter = benchmark::JSONReporter; + +BENCHMARK_DISABLE_DEPRECATED_WARNING + +using CustomCSVReporter = benchmark::CSVReporter; + +benchmark::BenchmarkReporter* ChooseCustomReporter() +{ + typedef benchmark::BenchmarkReporter* PtrType; + PtrType default_display_reporter = benchmark::CreateDefaultDisplayReporter(); + + if (IsType(default_display_reporter)) + { + return PtrType(new CustomCSVReporter); + } + else if (IsType(default_display_reporter)) + { + return PtrType(new CustomJSONReporter); + } + else if (IsType(default_display_reporter)) + { + return PtrType(new CustomConsoleReporter); + } + + return nullptr; +} + +BENCHMARK_RESTORE_DEPRECATED_WARNING + } // namespace bench_utils #endif // ROCTHRUST_BENCHMARKS_BENCH_UTILS_CUSTOM_REPORTER_HPP_ From a36adac320c71b39724173237e274f0d5c58a2fa Mon Sep 17 00:00:00 2001 From: Nick Breed Date: Tue, 13 Aug 2024 12:24:27 +0000 Subject: [PATCH 31/44] Added json and csv custom reporter for benchmarks --- benchmarks/bench_utils/custom_reporter.hpp | 555 ++++++++++++++++++--- 1 file changed, 481 insertions(+), 74 deletions(-) diff --git a/benchmarks/bench_utils/custom_reporter.hpp b/benchmarks/bench_utils/custom_reporter.hpp index 3d7d55b06..7bd008c98 100644 --- a/benchmarks/bench_utils/custom_reporter.hpp +++ b/benchmarks/bench_utils/custom_reporter.hpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -48,9 +49,88 @@ namespace bench_utils template bool IsType(const SrcType* src) { + // Check if the src can be casted to the DstType return dynamic_cast(src) != nullptr; } +static std::string FormatString(const char* msg, va_list args) +{ + // we might need a second shot at this, so pre-emptivly make a copy + va_list args_cp; + va_copy(args_cp, args); + + std::size_t size = 256; + char local_buff[256]; + auto ret = vsnprintf(local_buff, size, msg, args_cp); + + if(ret <= 0) + { + return {}; + } + else if(ret < 0) + if(static_cast(ret) < size) + { + return local_buff; + } + // we did not provide a long enough buffer on our first attempt. + size = static_cast(ret) + 1; // + 1 for the null byte + std::unique_ptr buff(new char[size]); + ret = vsnprintf(buff.get(), size, msg, args); + return buff.get(); +} + +static std::string FormatString(const char* msg, ...) +{ + va_list args; + va_start(args, msg); + auto tmp = FormatString(msg, args); + va_end(args); + return tmp; +} + +std::string get_complexity(const benchmark::BigO& complexity) +{ + switch(complexity) + { + case benchmark::oN: + return "N"; + case benchmark::oNSquared: + return "N^2"; + case benchmark::oNCubed: + return "N^3"; + case benchmark::oLogN: + return "lgN"; + case benchmark::oNLogN: + return "NlgN"; + case benchmark::o1: + return "(1)"; + default: + return "f(N)"; + } +} + +template +double calculate_bw_utils(const Run& result) +{ + // Calculates bandwith utilization in % + std::map* global_context + = benchmark::internal::GetGlobalContext(); + if(global_context != nullptr) + { + for(const auto& keyval : *global_context) + { + if(keyval.first == "hdp_peak_global_mem_bus_bandwidth") + { + const double global_mem_bw = result.counters.at("bytes_per_second").value; + const double peak_global_bw = std::stod(keyval.second); + const double bw_util = 100. * global_mem_bw / peak_global_bw; + return bw_util; + } + } + } + return -1; +} + /// \brief Custom Google Benchmark reporter for formatting the benchmarks' report matching Thrust's. /// /// This reporter is a ConsoleReporter that outputs: @@ -103,62 +183,6 @@ class CustomConsoleReporter : public benchmark::ConsoleReporter } } - std::string get_complexity(const benchmark::BigO& complexity) - { - switch(complexity) - { - case benchmark::oN: - return "N"; - case benchmark::oNSquared: - return "N^2"; - case benchmark::oNCubed: - return "N^3"; - case benchmark::oLogN: - return "lgN"; - case benchmark::oNLogN: - return "NlgN"; - case benchmark::o1: - return "(1)"; - default: - return "f(N)"; - } - } - - static std::string FormatString(const char* msg, va_list args) - { - // we might need a second shot at this, so pre-emptivly make a copy - va_list args_cp; - va_copy(args_cp, args); - - std::size_t size = 256; - char local_buff[256]; - auto ret = vsnprintf(local_buff, size, msg, args_cp); - - if(ret <= 0) - { - return {}; - } - else if(ret < 0) - if(static_cast(ret) < size) - { - return local_buff; - } - // we did not provide a long enough buffer on our first attempt. - size = static_cast(ret) + 1; // + 1 for the null byte - std::unique_ptr buff(new char[size]); - ret = vsnprintf(buff.get(), size, msg, args); - return buff.get(); - } - - static std::string FormatString(const char* msg, ...) - { - va_list args; - va_start(args, msg); - auto tmp = FormatString(msg, args); - va_end(args); - return tmp; - } - void PrintColoredString(std::ostream& os, std::string color, std::string str, ...) { os << color; @@ -326,23 +350,14 @@ class CustomConsoleReporter : public benchmark::ConsoleReporter unit); // Print BW util - std::map* global_context - = benchmark::internal::GetGlobalContext(); - if(global_context != nullptr) + s = FormatString("%.2f", c.second.value); + const double bw_util = calculate_bw_utils(result); + if (bw_util >= 0) { - s = FormatString("%.2f", c.second.value); - for(const auto& keyval : *global_context) - { - if(keyval.first == "hdp_peak_global_mem_bus_bandwidth") - { - const double global_mem_bw = std::stod(s); - const double peak_global_bw = std::stod(keyval.second); - s = FormatString("%.2f", 100. * global_mem_bw / peak_global_bw); - unit = "%"; - cNameLen = std::max(std::string::size_type(12), s.length()); - } - } - } + s = FormatString("%.2f", bw_util); + unit = "%"; + cNameLen = std::max(std::string::size_type(12), s.length()); + } } else if(c.first == "gpu_noise") { @@ -417,11 +432,403 @@ class CustomConsoleReporter : public benchmark::ConsoleReporter } }; -using CustomJSONReporter = benchmark::JSONReporter; +class CustomJSONReporter : public benchmark::JSONReporter +{ + private: + bool first_report_ = true; + + std::string StrEscape(const std::string& s) + { + std::string tmp; + tmp.reserve(s.size()); + for (char c : s) + { + switch (c) { + case '\b': + tmp += "\\b"; + break; + case '\f': + tmp += "\\f"; + break; + case '\n': + tmp += "\\n"; + break; + case '\r': + tmp += "\\r"; + break; + case '\t': + tmp += "\\t"; + break; + case '\\': + tmp += "\\\\"; + break; + case '"': + tmp += "\\\""; + break; + default: + tmp += c; + break; + } + } + return tmp; + } + + std::string FormatKV(std::string const& key, std::string const& value) + { + return FormatString("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); + } + + std::string FormatKV(std::string const& key, const char* value) + { + return FormatString("\"%s\": \"%s\"", StrEscape(key).c_str(), + StrEscape(value).c_str()); + } + + std::string FormatKV(std::string const& key, bool value) + { + return FormatString("\"%s\": %s", StrEscape(key).c_str(), + value ? "true" : "false"); + } + + std::string FormatKV(std::string const& key, int64_t value) + { + std::stringstream ss; + ss << '"' << StrEscape(key) << "\": " << value; + return ss.str(); + } + + std::string FormatKV(std::string const& key, double value) + { + std::stringstream ss; + ss << '"' << StrEscape(key) << "\": "; + + if (std::isnan(value)) + { + ss << (value < 0 ? "-" : "") << "NaN"; + } + else if (std::isinf(value)) + { + ss << (value < 0 ? "-" : "") << "Infinity"; + } + else + { + const auto max_digits10 = + std::numeric_limits::max_digits10; + const auto max_fractional_digits10 = max_digits10 - 1; + ss << std::scientific << std::setprecision(max_fractional_digits10) + << value; + } + return ss.str(); + } + + public: + void ReportRuns(std::vector const& reports) + { + if (reports.empty()) + { + return; + } + std::string indent(4, ' '); + std::ostream& out = GetOutputStream(); + if (!first_report_) + { + out << ",\n"; + } + first_report_ = false; + + for (auto it = reports.begin(); it != reports.end(); ++it) + { + out << indent << "{\n"; + PrintRunData(*it); + out << indent << '}'; + auto it_cp = it; + if (++it_cp != reports.end()) + { + out << ",\n"; + } + } + } + + void PrintRunData(Run const& run) + { + std::string indent(6, ' '); + std::ostream& out = GetOutputStream(); + + auto output_format = [this, &out, &indent](const std::string& label, + auto val, bool start_endl = true) + { + if (start_endl) + { + out << ",\n"; + } + out << indent << FormatKV(label, val); + }; + + output_format("name", run.benchmark_name(), false); + output_format("family_index", run.family_index); + output_format("per_family_instance_index", run.per_family_instance_index); + output_format("run_name", run.run_name.str()); + output_format("run_type", [&run]() -> const char* { + switch (run.run_type) { + case BenchmarkReporter::Run::RT_Iteration: + return "iteration"; + case BenchmarkReporter::Run::RT_Aggregate: + return "aggregate"; + } + BENCHMARK_UNREACHABLE(); + }()); + output_format("repetitions", run.repetitions); + if (run.run_type != BenchmarkReporter::Run::RT_Aggregate) + { + output_format("repetition_index", run.repetition_index); + } + output_format("threads", run.threads); + if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) { + output_format("aggregate_name", run.aggregate_name); + output_format("aggregate_unit", [&run]() -> const char* { + switch (run.aggregate_unit) + { + case benchmark::StatisticUnit::kTime: + return "time"; + case benchmark::StatisticUnit::kPercentage: + return "percentage"; + } + BENCHMARK_UNREACHABLE(); + }()); + } + if (benchmark::internal::SkippedWithError == run.skipped) + { + output_format("error_occurred", true); + output_format("error_message", run.skip_message); + } + else if (benchmark::internal::SkippedWithMessage == run.skipped) + { + output_format("skipped", true); + output_format("skip_message", run.skip_message); + } + if (!run.report_big_o && !run.report_rms) + { + output_format("iterations", run.iterations); + if (run.run_type != Run::RT_Aggregate || + run.aggregate_unit == benchmark::StatisticUnit::kTime) + { + output_format("gpu_time", run.GetAdjustedRealTime()); + output_format("cpu_time", run.GetAdjustedCPUTime()); + } else + { + assert(run.aggregate_unit == benchmark::StatisticUnit::kPercentage); + output_format("gpu_time", run.real_accumulated_time); + output_format("cpu_time", run.cpu_accumulated_time); + } + output_format("time_unit", GetTimeUnitString(run.time_unit)); + } + else if (run.report_big_o) + { + output_format("cpu_coefficient", run.GetAdjustedCPUTime()); + output_format("gpu_coefficient", run.GetAdjustedRealTime()); + output_format("big_o", get_complexity(run.complexity)); + output_format("time_unit", GetTimeUnitString(run.time_unit)); + } + else if (run.report_rms) + { + output_format("rms", run.GetAdjustedCPUTime()); + } + + for (auto& c : run.counters) + { + if(c.first == "items_per_second") + { + // Report same name as console reporter + output_format("elements_per_second", c.second); + } + else if (c.first == "bytes_per_second") + { + // Report same name as console reporter + output_format("global_mem_bw", c.second); + const double util_bw = calculate_bw_utils(run); + if (util_bw >= 0) + { + output_format("util_bw", util_bw); + } + } + else + { + output_format(c.first, c.second); + } + } + + if (run.memory_result) + { + const benchmark::MemoryManager::Result memory_result = *run.memory_result; + output_format("allocs_per_iter", run.allocs_per_iter); + output_format("max_bytes_used", memory_result.max_bytes_used); + + if (memory_result.total_allocated_bytes != benchmark::MemoryManager::TombstoneValue) + { + output_format("total_allocated_bytes", + memory_result.total_allocated_bytes); + } + + if (memory_result.net_heap_growth != benchmark::MemoryManager::TombstoneValue) + { + output_format("net_heap_growth", memory_result.net_heap_growth); + } + } + + if (!run.report_label.empty()) + { + output_format("label", run.report_label); + } + out << '\n'; + } +}; BENCHMARK_DISABLE_DEPRECATED_WARNING -using CustomCSVReporter = benchmark::CSVReporter; +class CustomCSVReporter : public benchmark::CSVReporter +{ + private: + bool printed_header_ = false; + + std::vector elements = { + "name", + "iterations", + "gpu_time", + "cpu_time", + "time_unit", + "global_mem_bw", + "util_bw", + "elements_per_second", + "gpu_noise", + "label", + "error_occurred", + "error_message"}; + + std::string CsvEscape(const std::string& s) + { + std::string tmp; + tmp.reserve(s.size() + 2); + for (char c : s) { + switch (c) + { + case '"': + tmp += "\"\""; + break; + default: + tmp += c; + break; + } + } + return '"' + tmp + '"'; + } + + public: + void PrintHeader(const Run& /*run*/) + { + std::string str = ""; + bool first = true; + for (auto element : elements) + { + if (first) + { + first = false; + } + else + { + str += ","; + } + str += element; + } + GetOutputStream() << str << "\n"; + } + + void PrintRunData(const Run& result) + { + // Report benchmark name + auto& sout = GetOutputStream(); + + if (result.skipped) + { + sout << std::string(elements.size()-3, ','); + sout << std::boolalpha << (benchmark::internal::SkippedWithError == result.skipped) << ","; + sout << CsvEscape(result.skip_message) << "\n"; + return; + } + + sout << CsvEscape(result.benchmark_name()) << ","; + + if(!result.report_big_o && !result.report_rms) + { + sout << result.iterations; + } + sout << ","; + + sout << result.GetAdjustedRealTime() << ","; + sout << result.GetAdjustedCPUTime() << ","; + + if (result.report_big_o) + { + sout << get_complexity(result.complexity); + } + else if (!result.report_rms) + { + sout << benchmark::GetTimeUnitString(result.time_unit); + } + sout << ","; + + if (result.counters.find("bytes_per_second") != result.counters.end()) + { + sout << result.counters.at("bytes_per_second"); + } + sout << ","; + + if (result.counters.find("bytes_per_second") != result.counters.end()) + { + const double bw_util = calculate_bw_utils(result); + if (bw_util >= 0) + { + sout << bw_util; + } + } + sout << ","; + + if (result.counters.find("items_per_second") != result.counters.end()) + { + sout << result.counters.at("items_per_second"); + } + sout << ","; + + if (result.counters.find("gpu_noise") != result.counters.end()) + { + sout << result.counters.at("gpu_noise"); + } + sout << ","; + + if (!result.report_label.empty()) + { + sout << CsvEscape(result.report_label); + } + + sout << ",,"; + sout << '\n'; + } + + void ReportRuns(const std::vector& reports) + { + for(const auto& run : reports) + { + // Print the header if none was printed yet + bool print_header = !printed_header_; + if(print_header) + { + printed_header_ = true; + PrintHeader(run); + } + PrintRunData(run); + } + } +}; benchmark::BenchmarkReporter* ChooseCustomReporter() { From e00ad3a7d84932d72269438d99ab255ee53c8e9b Mon Sep 17 00:00:00 2001 From: Nick Breed Date: Thu, 15 Aug 2024 12:08:54 +0000 Subject: [PATCH 32/44] Changes for review --- benchmarks/bench_utils/custom_reporter.hpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/benchmarks/bench_utils/custom_reporter.hpp b/benchmarks/bench_utils/custom_reporter.hpp index 7bd008c98..ea5ec56db 100644 --- a/benchmarks/bench_utils/custom_reporter.hpp +++ b/benchmarks/bench_utils/custom_reporter.hpp @@ -67,11 +67,11 @@ static std::string FormatString(const char* msg, va_list args) { return {}; } - else if(ret < 0) - if(static_cast(ret) < size) - { - return local_buff; - } + else if(static_cast(ret) < size) + { + return local_buff; + } + // we did not provide a long enough buffer on our first attempt. size = static_cast(ret) + 1; // + 1 for the null byte std::unique_ptr buff(new char[size]); @@ -810,8 +810,7 @@ class CustomCSVReporter : public benchmark::CSVReporter sout << CsvEscape(result.report_label); } - sout << ",,"; - sout << '\n'; + sout << ",,\n"; } void ReportRuns(const std::vector& reports) @@ -832,6 +831,8 @@ class CustomCSVReporter : public benchmark::CSVReporter benchmark::BenchmarkReporter* ChooseCustomReporter() { + // benchmark::BenchmarkReporter is polymorphic as it has a virtual + // function which allows us to use dynamic_cast to detect the derived type. typedef benchmark::BenchmarkReporter* PtrType; PtrType default_display_reporter = benchmark::CreateDefaultDisplayReporter(); From 1cc4c8b147d758e10b7e5554e7bc5e12a6ef9aae Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 8 Aug 2024 10:28:04 +0000 Subject: [PATCH 33/44] Added hipstdpar tests --- .gitlab-ci.yml | 1 + CMakeLists.txt | 5 +- README.md | 15 +- cmake/Summary.cmake | 1 + docs/install/installing.rst | 3 +- test/CMakeLists.txt | 249 +++++++++++++++-------------- test/hipstdpar/CMakeLists.txt | 102 ++++++++++++ test/hipstdpar/test_algorithms.cpp | 171 ++++++++++++++++++++ test/hipstdpar/test_interpose.cpp | 113 +++++++++++++ 9 files changed, 533 insertions(+), 127 deletions(-) create mode 100644 test/hipstdpar/CMakeLists.txt create mode 100644 test/hipstdpar/test_algorithms.cpp create mode 100644 test/hipstdpar/test_interpose.cpp diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b83ca3b65..096ddf9a9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -74,6 +74,7 @@ copyright-date: -D CMAKE_CXX_COMPILER=hipcc -D CMAKE_BUILD_TYPE=Release -D BUILD_TEST=OFF + -D BUILD_HIPSTDPAR_TEST=OFF -D BUILD_EXAMPLE=OFF -D ROCM_DEP_ROCMCORE=OFF -D CMAKE_C_COMPILER_LAUNCHER=phc_sccache_c diff --git a/CMakeLists.txt b/CMakeLists.txt index c134b3821..1ca1ee3e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,6 +80,7 @@ endif() # Disable -Werror option(DISABLE_WERROR "Disable building with Werror" ON) option(BUILD_TEST "Build tests" OFF) +option(BUILD_HIPSTDPAR_TEST "Build hipstdpar tests" OFF) option(BUILD_EXAMPLES "Build examples" OFF) option(BUILD_BENCHMARKS "Build benchmarks" OFF) option(DOWNLOAD_ROCPRIM "Download rocPRIM and do not search for rocPRIM package" OFF) @@ -143,14 +144,14 @@ if(BUILD_TEST OR BUILD_BENCHMARKS) endif() # Tests -if(BUILD_TEST) +if(BUILD_TEST OR BUILD_HIPSTDPAR_TEST) rocm_package_setup_client_component(tests) if (ENABLE_UPSTREAM_TESTS) enable_testing() endif() # We still want the testing to be compiled to catch some errors #TODO: Get testing folder working with HIP on Windows - if (NOT WIN32) + if (NOT WIN32 AND BUILD_TEST) add_subdirectory(testing) endif() enable_testing() diff --git a/README.md b/README.md index 3d2445311..09017d867 100644 --- a/README.md +++ b/README.md @@ -64,13 +64,14 @@ cd rocThrust; mkdir build; cd build # Configure rocThrust, setup options for your system. # Build options: -# DISABLE_WERROR - ON by default, This flag disable the -Werror compiler flag -# BUILD_TEST - OFF by default, -# BUILD_EXAMPLES - OFF by default, -# BUILD_BENCHMARKS - OFF by default, -# DOWNLOAD_ROCPRIM - OFF by default, when ON rocPRIM will be downloaded to the build folder, -# RNG_SEED_COUNT - 0 by default, controls non-repeatable random dataset count -# PRNG_SEEDS - 1 by default, reproducible seeds to generate random data +# DISABLE_WERROR - ON by default, This flag disable the -Werror compiler flag +# BUILD_TEST - OFF by default, +# BUILD_HIPSTDPAR_TEST - OFF by default, +# BUILD_EXAMPLES - OFF by default, +# BUILD_BENCHMARKS - OFF by default, +# DOWNLOAD_ROCPRIM - OFF by default, when ON rocPRIM will be downloaded to the build folder, +# RNG_SEED_COUNT - 0 by default, controls non-repeatable random dataset count +# PRNG_SEEDS - 1 by default, reproducible seeds to generate random data # # ! IMPORTANT ! # On ROCm platform set C++ compiler to HipCC. You can do it by adding 'CXX=' diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 843ec35bf..62ccfbc3f 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -40,6 +40,7 @@ endif() message(STATUS " DOWNLOAD_ROCPRIM : ${DOWNLOAD_ROCPRIM}") message(STATUS " DOWNLOAD_ROCRAND : ${DOWNLOAD_ROCRAND}") message(STATUS " BUILD_TEST : ${BUILD_TEST}") + message(STATUS " BUILD_HIPSTDPAR_TEST : ${BUILD_HIPSTDPAR_TEST}") message(STATUS " BUILD_EXAMPLES : ${BUILD_EXAMPLES}") message(STATUS " BUILD_BENCHMARKS : ${BUILD_BENCHMARKS}") message(STATUS " BUILD_ADDRESS_SANITIZER : ${BUILD_ADDRESS_SANITIZER}") diff --git a/docs/install/installing.rst b/docs/install/installing.rst index acac22b62..a4a2fbd12 100644 --- a/docs/install/installing.rst +++ b/docs/install/installing.rst @@ -98,7 +98,8 @@ For a more elaborate installation process, rocThrust can be built manually using The following configuration options are available, in addition to the built-in CMake options: * ``DISABLE_WERROR`` disables passing ``-Werror`` to the compiler during the build. ``ON`` by default. -* ``BUILD_TEST`` controls whether to build the rocThrust tests. ``OFF`` by default. +* ``BUILD_TEST`` controls whether to build the rocThrust and hipstdpar tests. ``OFF`` by default. +* ``BUILD_HIPSTDPAR_TEST`` controls whether to build the hipstdpar tests. Enabling this option is only necessary when BUILD_TEST is OFF. ``OFF`` by default. * ``BUILD_BENCHMARK`` controls whether to build the rocThrust benchmarks. ``OFF`` by default. * ``BUILD_EXAMPLES`` controls whether to build rocThrust examples. ``OFF`` by default. * ``DOWNLOAD_ROCPRIM`` controls whether to force downloading rocPRIM, regardless of whether rocPRIM is currently installed. Defaults to ``OFF``. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5c2f9f8bf..7f038ced0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -123,123 +123,138 @@ endfunction() # Tests # **************************************************************************** -add_rocthrust_test("adjacent_difference") -add_rocthrust_test("advance") -add_rocthrust_test("allocator") -add_rocthrust_test("allocator_aware_policies") -add_rocthrust_test("async_copy") -add_rocthrust_test("async_reduce") -add_rocthrust_test("async_scan") -add_rocthrust_test("async_sort") -add_rocthrust_test("async_transform") -add_rocthrust_test("binary_search") -add_rocthrust_test("binary_search_descending") -add_rocthrust_test("binary_search_vector") -add_rocthrust_test("binary_search_vector_descending") -add_rocthrust_test("complex") -add_rocthrust_test("complex_transform") -add_rocthrust_test("constant_iterator") -add_rocthrust_test("copy") -add_rocthrust_test("copy_n") -add_rocthrust_test("count") -add_rocthrust_test("counting_iterator") -add_rocthrust_test("dereference") -add_rocthrust_test("device_delete") -add_rocthrust_test("device_ptr") -add_rocthrust_test("device_reference") -add_rocthrust_test("discard_iterator") -add_rocthrust_test("distance") -add_rocthrust_test("equal") -add_rocthrust_test("fill") -add_rocthrust_test("find") -add_rocthrust_test("for_each") -add_rocthrust_test("gather") -add_rocthrust_test("generate") -add_rocthrust_test("inner_product") -add_rocthrust_test("is_sorted") -add_rocthrust_test("is_partitioned") -add_rocthrust_test("is_sorted_until") -add_rocthrust_test("max_element") -add_rocthrust_test("memory") -add_rocthrust_test("merge") -add_rocthrust_test("merge_by_key") -add_rocthrust_test("min_element") -add_rocthrust_test("minmax_element") -add_rocthrust_test("mismatch") -add_rocthrust_test("mr_disjoint_pool") -add_rocthrust_test("mr_new") -add_rocthrust_test("mr_pool") -add_rocthrust_test("mr_pool_options") -add_rocthrust_test("optional") -add_rocthrust_test("pair") -add_rocthrust_test("pair_reduce") -add_rocthrust_test("pair_scan") -add_rocthrust_test("pair_sort") -add_rocthrust_test("pair_transform") -add_rocthrust_test("parallel_for") -add_rocthrust_test("partition") -add_rocthrust_test("partition_point") -add_rocthrust_test("permutation_iterator") -add_rocthrust_test("random") -add_rocthrust_test("reduce") -add_rocthrust_test("reduce_by_key") -add_rocthrust_test("remove") -add_rocthrust_test("replace") -add_rocthrust_test("reproducibility") -add_rocthrust_test("reverse_iterator") -add_rocthrust_test("set_difference") -add_rocthrust_test("set_difference_by_key") -add_rocthrust_test("set_difference_by_key_descending") -add_rocthrust_test("set_difference_descending") -add_rocthrust_test("set_intersection") -add_rocthrust_test("set_intersection_by_key") -add_rocthrust_test("set_intersection_by_key_descending") -add_rocthrust_test("set_intersection_descending") -add_rocthrust_test("set_intersection_key_value") -add_rocthrust_test("set_symmetric_difference") -add_rocthrust_test("set_symmetric_difference_by_key_descending") -add_rocthrust_test("set_symmetric_difference_by_key") -add_rocthrust_test("shuffle") -add_rocthrust_test("scan") -add_rocthrust_test("scan_by_key") -add_rocthrust_test("scatter") -add_rocthrust_test("sequence") -add_rocthrust_test("stable_sort") -add_rocthrust_test("stable_sort_by_key") -add_rocthrust_test("stable_sort_by_key_large_keys_and_values") -add_rocthrust_test("stable_sort_by_key_large_keys") -add_rocthrust_test("stable_sort_by_key_large_values") -add_rocthrust_test("stable_sort_large") -add_rocthrust_test("sort") -add_rocthrust_test("sort_by_key") -add_rocthrust_test("sort_by_key_variable_bits") -add_rocthrust_test("sort_permutation_iterator") -add_rocthrust_test("sort_variables") -add_rocthrust_test("swap_ranges") -add_rocthrust_test("tabulate") -add_rocthrust_test("transform") -add_rocthrust_test("transform_iterator") -add_rocthrust_test("transform_reduce") -add_rocthrust_test("transform_scan") -add_rocthrust_test("tuple") -add_rocthrust_test("tuple_reduce") -add_rocthrust_test("tuple_sort") -add_rocthrust_test("tuple_transform") -add_rocthrust_test("uninitialized_copy") -add_rocthrust_test("uninitialized_fill") -add_rocthrust_test("unique") -add_rocthrust_test("unique_by_key") -add_rocthrust_test("universal_memory") -add_rocthrust_test("vector") -add_rocthrust_test("vector_allocators") -add_rocthrust_test("vector_insert") -add_rocthrust_test("vector_manipulation") -add_rocthrust_test("zip_iterator") -add_rocthrust_test("zip_iterator_reduce") -add_rocthrust_test("zip_iterator_scan") -add_rocthrust_test("zip_iterator_sort") -add_rocthrust_test("zip_iterator_sort_by_key") -add_rocthrust_test("zip_iterator_reduce_by_key") +# rocThrust tests +if(BUILD_TEST) + add_rocthrust_test("adjacent_difference") + add_rocthrust_test("advance") + add_rocthrust_test("allocator") + add_rocthrust_test("allocator_aware_policies") + add_rocthrust_test("async_copy") + add_rocthrust_test("async_reduce") + add_rocthrust_test("async_scan") + add_rocthrust_test("async_sort") + add_rocthrust_test("async_transform") + add_rocthrust_test("binary_search") + add_rocthrust_test("binary_search_descending") + add_rocthrust_test("binary_search_vector") + add_rocthrust_test("binary_search_vector_descending") + add_rocthrust_test("complex") + add_rocthrust_test("complex_transform") + add_rocthrust_test("constant_iterator") + add_rocthrust_test("copy") + add_rocthrust_test("copy_n") + add_rocthrust_test("count") + add_rocthrust_test("counting_iterator") + add_rocthrust_test("dereference") + add_rocthrust_test("device_delete") + add_rocthrust_test("device_ptr") + add_rocthrust_test("device_reference") + add_rocthrust_test("discard_iterator") + add_rocthrust_test("distance") + add_rocthrust_test("equal") + add_rocthrust_test("fill") + add_rocthrust_test("find") + add_rocthrust_test("for_each") + add_rocthrust_test("gather") + add_rocthrust_test("generate") + add_rocthrust_test("inner_product") + add_rocthrust_test("is_sorted") + add_rocthrust_test("is_partitioned") + add_rocthrust_test("is_sorted_until") + add_rocthrust_test("max_element") + add_rocthrust_test("memory") + add_rocthrust_test("merge") + add_rocthrust_test("merge_by_key") + add_rocthrust_test("min_element") + add_rocthrust_test("minmax_element") + add_rocthrust_test("mismatch") + add_rocthrust_test("mr_disjoint_pool") + add_rocthrust_test("mr_new") + add_rocthrust_test("mr_pool") + add_rocthrust_test("mr_pool_options") + add_rocthrust_test("optional") + add_rocthrust_test("pair") + add_rocthrust_test("pair_reduce") + add_rocthrust_test("pair_scan") + add_rocthrust_test("pair_sort") + add_rocthrust_test("pair_transform") + add_rocthrust_test("parallel_for") + add_rocthrust_test("partition") + add_rocthrust_test("partition_point") + add_rocthrust_test("permutation_iterator") + add_rocthrust_test("random") + add_rocthrust_test("reduce") + add_rocthrust_test("reduce_by_key") + add_rocthrust_test("remove") + add_rocthrust_test("replace") + add_rocthrust_test("reproducibility") + add_rocthrust_test("reverse_iterator") + add_rocthrust_test("set_difference") + add_rocthrust_test("set_difference_by_key") + add_rocthrust_test("set_difference_by_key_descending") + add_rocthrust_test("set_difference_descending") + add_rocthrust_test("set_intersection") + add_rocthrust_test("set_intersection_by_key") + add_rocthrust_test("set_intersection_by_key_descending") + add_rocthrust_test("set_intersection_descending") + add_rocthrust_test("set_intersection_key_value") + add_rocthrust_test("set_symmetric_difference") + add_rocthrust_test("set_symmetric_difference_by_key_descending") + add_rocthrust_test("set_symmetric_difference_by_key") + add_rocthrust_test("shuffle") + add_rocthrust_test("scan") + add_rocthrust_test("scan_by_key") + add_rocthrust_test("scatter") + add_rocthrust_test("sequence") + add_rocthrust_test("stable_sort") + add_rocthrust_test("stable_sort_by_key") + add_rocthrust_test("stable_sort_by_key_large_keys_and_values") + add_rocthrust_test("stable_sort_by_key_large_keys") + add_rocthrust_test("stable_sort_by_key_large_values") + add_rocthrust_test("stable_sort_large") + add_rocthrust_test("sort") + add_rocthrust_test("sort_by_key") + add_rocthrust_test("sort_by_key_variable_bits") + add_rocthrust_test("sort_permutation_iterator") + add_rocthrust_test("sort_variables") + add_rocthrust_test("swap_ranges") + add_rocthrust_test("tabulate") + add_rocthrust_test("transform") + add_rocthrust_test("transform_iterator") + add_rocthrust_test("transform_reduce") + add_rocthrust_test("transform_scan") + add_rocthrust_test("tuple") + add_rocthrust_test("tuple_reduce") + add_rocthrust_test("tuple_sort") + add_rocthrust_test("tuple_transform") + add_rocthrust_test("uninitialized_copy") + add_rocthrust_test("uninitialized_fill") + add_rocthrust_test("unique") + add_rocthrust_test("unique_by_key") + add_rocthrust_test("universal_memory") + add_rocthrust_test("vector") + add_rocthrust_test("vector_allocators") + add_rocthrust_test("vector_insert") + add_rocthrust_test("vector_manipulation") + add_rocthrust_test("zip_iterator") + add_rocthrust_test("zip_iterator_reduce") + add_rocthrust_test("zip_iterator_scan") + add_rocthrust_test("zip_iterator_sort") + add_rocthrust_test("zip_iterator_sort_by_key") + add_rocthrust_test("zip_iterator_reduce_by_key") +endif() + +# hipstdpar tests +if(BUILD_TEST OR BUILD_HIPSTDPAR_TEST) + if(WIN32) + message( + STATUS + "Not building hipstdpar tests, currently they do not support Windows." + ) + else() + add_subdirectory(hipstdpar) + endif() +endif() rocm_install( FILES "${INSTALL_TEST_FILE}" diff --git a/test/hipstdpar/CMakeLists.txt b/test/hipstdpar/CMakeLists.txt new file mode 100644 index 000000000..3fce6ecba --- /dev/null +++ b/test/hipstdpar/CMakeLists.txt @@ -0,0 +1,102 @@ +# ######################################################################## +# Copyright 2024 Advanced Micro Devices, Inc. +# ######################################################################## + +function(add_hipstdpar_test TEST TEST_TYPE INTERPOSE_ALLOC) + set(TEST_SOURCE "test_${TEST}.cpp") + set(TEST_TARGET "${TEST}_${TEST_TYPE}.hip") + + add_executable(${TEST_TARGET} ${TEST_SOURCE}) + target_compile_options(${TEST_TARGET} + PRIVATE + --hipstdpar + --hipstdpar-path=${HIPSTDPAR_LOCATION} + --hipstdpar-thrust-path=${THRUST_LOCATION}) + if(INTERPOSE_ALLOC) + target_compile_options(${TEST_TARGET} + PRIVATE + --hipstdpar-interpose-alloc + ) + endif() + target_link_libraries(${TEST_TARGET} + PRIVATE + --hipstdpar + TBB::tbb + Threads::Threads + ) + + if (NOT WIN32) + foreach(gpu_target ${GPU_TARGETS}) + target_link_libraries(${TEST_TARGET} + PRIVATE + --offload-arch=${gpu_target} + ) + endforeach() + endif() + set_target_properties(${TEST_TARGET} + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/test/hipstdpar" + ) + if(AMDGPU_TEST_TARGETS) + foreach(AMDGPU_TARGET IN LISTS AMDGPU_TEST_TARGETS) + add_test("${AMDGPU_TARGET}-${TEST_TARGET}" ${TEST_TARGET}) + set_tests_properties("${AMDGPU_TARGET}-${TEST_TARGET}" + PROPERTIES + RESOURCE_GROUPS "1,${AMDGPU_TARGET}:1" + LABELS "hip;${AMDGPU_TARGET}" + ) + endforeach() + else() + add_relative_test(${TEST_TARGET} ${TEST_TARGET}) + set_tests_properties(${TEST_TARGET} + PROPERTIES + LABELS "hip" + ) + endif() + + rocm_install(TARGETS ${TEST_TARGET} COMPONENT tests) + if (WIN32 AND NOT DEFINED DLLS_COPIED) + set(DLLS_COPIED "YES") + set(DLLS_COPIED ${DLLS_COPIED} PARENT_SCOPE) + # for now adding in all .dll as dependency chain is not cmake based on win32 + file( GLOB third_party_dlls + LIST_DIRECTORIES ON + CONFIGURE_DEPENDS + ${HIP_DIR}/bin/*.dll + ${CMAKE_SOURCE_DIR}/rtest.* + ) + foreach( file_i ${third_party_dlls}) + add_custom_command( TARGET ${TEST_TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${file_i} ${PROJECT_BINARY_DIR}/test ) + endforeach( file_i ) + endif() +endfunction() + +# Make sure that hipstdpar tests are compiled for C++17, without changing the global state +set(ROCTHRUST_CMAKE_CXX_STANDARD ${CMAKE_CXX_STANDARD}) +set(CMAKE_CXX_STANDARD 17) + +# Dependencies +find_package(TBB QUIET) +if(NOT TARGET TBB::tbb AND NOT TARGET tbb) + message(STATUS "Thread Building Blocks not found. Fetching...") + FetchContent_Declare( + thread-building-blocks + GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git + GIT_TAG 1c4c93fc5398c4a1acb3492c02db4699f3048dea # v2021.13.0 + ) + FetchContent_MakeAvailable(thread-building-blocks) +else() + find_package(TBB REQUIRED) +endif() +find_package(Threads REQUIRED) + +# Define where to find rocThrust and hipstdpar headers +set(THRUST_LOCATION ${PROJECT_SOURCE_DIR}) +set(HIPSTDPAR_LOCATION ${THRUST_LOCATION}/thrust/system/hip/hipstdpar) + +# Add tests +add_hipstdpar_test("algorithms" "compile" OFF) +add_hipstdpar_test("interpose" "compile" ON) + +# Restore global state +set(CMAKE_CXX_STANDARD ${ROCTHRUST_CMAKE_CXX_STANDARD}) diff --git a/test/hipstdpar/test_algorithms.cpp b/test/hipstdpar/test_algorithms.cpp new file mode 100644 index 000000000..bbe7185e6 --- /dev/null +++ b/test/hipstdpar/test_algorithms.cpp @@ -0,0 +1,171 @@ +// MIT License +// +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include + +int main() +{ + using namespace std; + + vector v {}; + + adjacent_difference(execution::par_unseq, cbegin(v), cend(v), begin(v)); + adjacent_difference(execution::par_unseq, cbegin(v), cend(v), begin(v), minus<> {}); + adjacent_find(execution::par_unseq, cbegin(v), cend(v)); + adjacent_find(execution::par_unseq, cbegin(v), cend(v), equal_to<> {}); + all_of(execution::par_unseq, cbegin(v), cend(v), logical_not<> {}); + copy(execution::par_unseq, cbegin(v), cend(v), begin(v)); + copy_if(execution::par_unseq, cbegin(v), cend(v), begin(v), logical_not<> {}); + copy_n(execution::par_unseq, cbegin(v), size(v), begin(v)); + count(execution::par_unseq, cbegin(v), cend(v), 42); + count_if(execution::par_unseq, cbegin(v), cend(v), logical_not<> {}); + destroy(execution::par_unseq, begin(v), end(v)); + destroy_n(execution::par_unseq, begin(v), size(v)); + equal(execution::par_unseq, cbegin(v), cend(v), cbegin(v)); + equal(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v)); + equal(execution::par_unseq, cbegin(v), cend(v), cbegin(v), equal_to<> {}); + equal(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), equal_to<> {}); + exclusive_scan(execution::par_unseq, cbegin(v), cend(v), begin(v), 0); + exclusive_scan(execution::par_unseq, cbegin(v), cend(v), begin(v), 0, plus<> {}); + fill(execution::par_unseq, begin(v), end(v), 0); + fill_n(execution::par_unseq, begin(v), size(v), 0); + find(execution::par_unseq, cbegin(v), cend(v), 42); + find_end(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v)); + find_end(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), equal_to<> {}); + find_first_of(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v)); + find_first_of(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), equal_to<> {}); + find_if(execution::par_unseq, cbegin(v), cend(v), logical_not<> {}); + find_if_not(execution::par_unseq, cbegin(v), cend(v), logical_not<> {}); + for_each(execution::par_unseq, cbegin(v), cend(v), [](auto&&) {}); + for_each_n(execution::par_unseq, cbegin(v), size(v), [](auto&&) {}); + generate(execution::par_unseq, begin(v), end(v), []() { return 42; }); + generate_n(execution::par_unseq, begin(v), size(v), []() { return 42; }); + includes(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v)); + includes(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), equal_to<> {}); + inclusive_scan(execution::par_unseq, cbegin(v), cend(v), begin(v)); + inclusive_scan(execution::par_unseq, cbegin(v), cend(v), begin(v), plus<> {}); + inclusive_scan(execution::par_unseq, cbegin(v), cend(v), begin(v), plus<> {}, 0); + inplace_merge(execution::par_unseq, begin(v), begin(v), end(v)); + inplace_merge(execution::par_unseq, begin(v), begin(v), end(v), less<> {}); + is_heap(execution::par_unseq, cbegin(v), cend(v)); + is_heap(execution::par_unseq, cbegin(v), cend(v), less<> {}); + is_heap_until(execution::par_unseq, cbegin(v), cend(v)); + is_heap_until(execution::par_unseq, cbegin(v), cend(v), less<> {}); + is_partitioned(execution::par_unseq, cbegin(v), cend(v), logical_not<> {}); + is_sorted(execution::par_unseq, cbegin(v), cend(v)); + is_sorted(execution::par_unseq, cbegin(v), cend(v), less<> {}); + is_sorted_until(execution::par_unseq, cbegin(v), cend(v)); + is_sorted_until(execution::par_unseq, cbegin(v), cend(v), less<> {}); + lexicographical_compare(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v)); + lexicographical_compare( + execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), less<> {}); + max_element(execution::par_unseq, cbegin(v), cend(v)); + max_element(execution::par_unseq, cbegin(v), cend(v), less<> {}); + merge(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v)); + merge(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v), less<> {}); + min_element(execution::par_unseq, cbegin(v), cend(v)); + min_element(execution::par_unseq, cbegin(v), cend(v), less<> {}); + minmax_element(execution::par_unseq, cbegin(v), cend(v)); + minmax_element(execution::par_unseq, cbegin(v), cend(v), less<> {}); + mismatch(execution::par_unseq, cbegin(v), cend(v), cbegin(v)); + mismatch(execution::par_unseq, cbegin(v), cend(v), cbegin(v), equal_to<> {}); + mismatch(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v)); + mismatch(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), equal_to<> {}); + move(execution::par_unseq, cbegin(v), cend(v), begin(v)); + none_of(execution::par_unseq, cbegin(v), cend(v), logical_not<> {}); + nth_element(execution::par_unseq, begin(v), begin(v), end(v)); + nth_element(execution::par_unseq, begin(v), begin(v), end(v), less<> {}); + partial_sort(execution::par_unseq, begin(v), begin(v), end(v)); + partial_sort(execution::par_unseq, begin(v), begin(v), end(v), less<> {}); + partial_sort_copy(execution::par_unseq, cbegin(v), cend(v), begin(v), end(v)); + partial_sort_copy(execution::par_unseq, cbegin(v), cend(v), begin(v), end(v), less<> {}); + partition(execution::par_unseq, begin(v), end(v), logical_not<> {}); + partition_copy(execution::par_unseq, cbegin(v), cend(v), begin(v), begin(v), logical_not<> {}); + reduce(execution::par_unseq, cbegin(v), cend(v)); + reduce(execution::par_unseq, cbegin(v), cend(v), 0); + reduce(execution::par_unseq, cbegin(v), cend(v), 0, plus<> {}); + remove(execution::par_unseq, begin(v), end(v), 42); + remove_copy(execution::par_unseq, cbegin(v), cend(v), begin(v), 42); + remove_copy_if(execution::par_unseq, cbegin(v), cend(v), begin(v), logical_not<> {}); + remove_if(execution::par_unseq, begin(v), end(v), logical_not<> {}); + replace(execution::par_unseq, begin(v), end(v), 42, 69); + replace_copy(execution::par_unseq, cbegin(v), cend(v), begin(v), 42, 69); + replace_copy_if(execution::par_unseq, cbegin(v), cend(v), begin(v), logical_not<> {}, 42); + replace_if(execution::par_unseq, begin(v), end(v), logical_not<> {}, 42); + reverse(execution::par_unseq, begin(v), end(v)); + reverse_copy(execution::par_unseq, cbegin(v), cend(v), begin(v)); + rotate(execution::par_unseq, begin(v), begin(v), end(v)); + rotate_copy(execution::par_unseq, cbegin(v), cbegin(v), cend(v), begin(v)); + search(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v)); + search(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), equal_to<> {}); + search_n(execution::par_unseq, cbegin(v), cend(v), size(v), 42); + search_n(execution::par_unseq, cbegin(v), cend(v), size(v), 42, equal_to<> {}); + set_difference(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v)); + set_difference( + execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v), equal_to<> {}); + set_intersection(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v)); + set_intersection( + execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v), equal_to<> {}); + set_symmetric_difference( + execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v)); + set_symmetric_difference( + execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v), equal_to<> {}); + set_union(execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v)); + set_union( + execution::par_unseq, cbegin(v), cend(v), cbegin(v), cend(v), begin(v), equal_to<> {}); + sort(execution::par_unseq, begin(v), end(v)); + sort(execution::par_unseq, begin(v), end(v), less<> {}); + stable_partition(execution::par_unseq, begin(v), end(v), logical_not<> {}); + stable_sort(execution::par_unseq, begin(v), end(v)); + stable_sort(execution::par_unseq, begin(v), end(v), less<> {}); + swap_ranges(execution::par_unseq, begin(v), end(v), begin(v)); + transform(execution::par_unseq, cbegin(v), cend(v), begin(v), logical_not<> {}); + transform(execution::par_unseq, cbegin(v), cend(v), cbegin(v), begin(v), plus<> {}); + transform_exclusive_scan( + execution::par_unseq, cbegin(v), cend(v), begin(v), 0, plus<> {}, logical_not<> {}); + transform_inclusive_scan( + execution::par_unseq, cbegin(v), cend(v), begin(v), plus<> {}, logical_not<> {}); + transform_inclusive_scan( + execution::par_unseq, cbegin(v), cend(v), begin(v), plus<> {}, logical_not<> {}, 42); + transform_reduce(execution::par_unseq, cbegin(v), cend(v), cbegin(v), 42); + transform_reduce(execution::par_unseq, cbegin(v), cend(v), 42, plus<> {}, logical_not<> {}); + transform_reduce( + execution::par_unseq, cbegin(v), cend(v), cbegin(v), 42, plus<> {}, minus<> {}); + uninitialized_copy(execution::par_unseq, cbegin(v), cend(v), begin(v)); + uninitialized_copy_n(execution::par_unseq, cbegin(v), size(v), begin(v)); + uninitialized_default_construct(execution::par_unseq, begin(v), end(v)); + uninitialized_default_construct_n(execution::par_unseq, begin(v), size(v)); + uninitialized_fill(execution::par_unseq, begin(v), end(v), 42); + uninitialized_fill_n(execution::par_unseq, begin(v), size(v), 42); + uninitialized_move(execution::par_unseq, begin(v), end(v), begin(v)); + uninitialized_move_n(execution::par_unseq, begin(v), size(v), begin(v)); + uninitialized_value_construct(execution::par_unseq, begin(v), end(v)); + uninitialized_value_construct_n(execution::par_unseq, begin(v), size(v)); + unique(execution::par_unseq, begin(v), end(v)); + unique(execution::par_unseq, begin(v), end(v), equal_to<> {}); + unique_copy(execution::par_unseq, cbegin(v), cend(v), begin(v)); + unique_copy(execution::par_unseq, cbegin(v), cend(v), begin(v), equal_to<> {}); + + return EXIT_SUCCESS; +} diff --git a/test/hipstdpar/test_interpose.cpp b/test/hipstdpar/test_interpose.cpp new file mode 100644 index 000000000..6f6d18bc6 --- /dev/null +++ b/test/hipstdpar/test_interpose.cpp @@ -0,0 +1,113 @@ +// MIT License +// +// Copyright (c) 2023-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include +#include +#include + +extern "C" void* __libc_calloc(std::size_t, std::size_t); +extern "C" void __libc_cfree(void*); +extern "C" void __libc_free(void*); +extern "C" void* __libc_malloc(std::size_t); +extern "C" void* __libc_memalign(std::size_t, std::size_t); +extern "C" void* __libc_realloc(void*, std::size_t); +extern "C" int __posix_memalign(void**, std::size_t, std::size_t); + +int main() +{ + try + { + if(auto p = std::aligned_alloc(8u, 42)) + std::free(p); + if(auto p = std::calloc(1, 42)) + std::free(p); + if(auto p = std::malloc(42)) + std::free(p); + if(auto p = memalign(8, 42)) + std::free(p); + if(void* p; posix_memalign(&p, 8, 42) == 0) + std::free(p); + if(auto p = std::realloc(std::malloc(42), 42)) + std::free(p); + if(auto p = reallocarray(std::calloc(1, 42), 1, 42)) + std::free(p); + if(auto p = new std::uint8_t) + delete p; + if(auto p = new(std::align_val_t {8}) std::uint8_t) + { + ::operator delete(p, std::align_val_t {8}); + } + if(auto p = new(std::nothrow) std::uint8_t) + delete p; + if(auto p = new(std::align_val_t {8}, std::nothrow) std::uint8_t) + { + ::operator delete(p, std::align_val_t {8}); + } + if(auto p = new std::uint8_t[42]) + delete[] p; + if(auto p = new(std::align_val_t {8}) std::uint8_t[42]) + { + ::operator delete[](p, std::align_val_t {8}); + } + if(auto p = new(std::nothrow) std::uint8_t[42]) + delete[] p; + if(auto p = new(std::align_val_t {8}, std::nothrow) std::uint8_t[42]) + { + ::operator delete[](p, std::align_val_t {8}); + } + if(auto p = __builtin_calloc(1, 42)) + __builtin_free(p); + if(auto p = __builtin_malloc(42)) + __builtin_free(p); + if(auto p = __builtin_operator_new(42)) + __builtin_operator_delete(p); + if(auto p = __builtin_operator_new(42, std::align_val_t {8})) + { + __builtin_operator_delete(p, std::align_val_t {8}); + } + if(auto p = __builtin_operator_new(42, std::nothrow)) + { + __builtin_operator_delete(p); + } + if(auto p = __builtin_operator_new(42, std::align_val_t {8}, std::nothrow)) + { + __builtin_operator_delete(p, std::align_val_t {8}); + } + if(auto p = __builtin_realloc(__builtin_malloc(42), 41)) + { + __builtin_free(p); + } + if(auto p = __libc_calloc(1, 42)) + __libc_free(p); + if(auto p = __libc_malloc(42)) + __libc_free(p); + if(auto p = __libc_memalign(8, 42)) + __libc_free(p); + } + catch(...) + { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} From 568f6f98f0daa8b0e070c548189d1fa45e462355 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 8 Aug 2024 10:28:04 +0000 Subject: [PATCH 34/44] Relocated our ParallelSTL additions --- thrust/system/hip/hipstdpar/hipstdpar_lib.hpp | 292 +++++++++++++++++- 1 file changed, 289 insertions(+), 3 deletions(-) diff --git a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp index 2c1b6ad10..b60958bb7 100644 --- a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp +++ b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp @@ -2278,15 +2278,301 @@ // END NONE_OF // BEGIN NTH_ELEMENT - // TODO: UNIMPLEMENTED IN THRUST + template () + || !hipstd::is_offloadable_callable()>* = nullptr> + inline void nth_element(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt nth, + KeysIt last, + CompareOp compare_op) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr(!hipstd::is_offloadable_callable()) + { + hipstd::unsupported_callable_type(); + } + + std::nth_element(std::execution::par, first, nth, last, std::move(compare_op)); + } + + template () + && hipstd::is_offloadable_callable()>* = nullptr> + inline void nth_element(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt nth, + KeysIt last, + CompareOp compare_op) + { + const size_t count = static_cast(thrust::distance(first, last)); + const size_t n = static_cast(thrust::distance(first, nth)); + + if(count == 0) + { + return; + } + + auto policy = thrust::device; + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::nth_element( + nullptr, storage_size, first, n, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 1st step"); + // Allocate temporary storage. + thrust::detail::temporary_array tmp( + policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::nth_element( + ptr, storage_size, first, n, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), + "nth_element: failed to synchronize"); + } + + template ()>* = nullptr> + inline void + nth_element(execution::parallel_unsequenced_policy, KeysIt first, KeysIt nth, KeysIt last) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + + std::nth_element(std::execution::par, first, nth, last); + } + + template ()>* = nullptr> + inline void nth_element(execution::parallel_unsequenced_policy policy, + KeysIt first, + KeysIt nth, + KeysIt last) + { + typedef typename thrust::iterator_value::type item_type; + std::nth_element(policy, first, nth, last, thrust::less()); + } // END NTH_ELEMENT // BEGIN PARTIAL_SORT - // TODO: UNIMPLEMENTED IN THRUST + template () + || !hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt middle, + KeysIt last, + CompareOp compare_op) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr(!hipstd::is_offloadable_callable()) + { + hipstd::unsupported_callable_type(); + } + + std::partial_sort(std::execution::par, first, middle, last, std::move(compare_op)); + } + + template () + && hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt middle, + KeysIt last, + CompareOp compare_op) + { + const size_t count = static_cast(thrust::distance(first, last)); + const size_t n = static_cast(thrust::distance(first, middle)); + + if(count == 0 || n == 0) + { + return; + } + + const size_t n_index = n - 1; + + auto policy = thrust::device; + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::partial_sort( + nullptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp( + policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::partial_sort( + ptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), + "partial_sort: failed to synchronize"); + } + + template ()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt middle, + KeysIt last) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + + std::partial_sort(std::execution::par, first, middle, last); + } + + template ()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy policy, + KeysIt first, + KeysIt middle, + KeysIt last) + { + typedef typename thrust::iterator_value::type item_type; + std::partial_sort(policy, first, middle, last, thrust::less()); + } // END PARTIAL_SORT // BEGIN PARTIAL_SORT_COPY - // TODO: UNIMPLEMENTED IN THRUST + template () + || !hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last, + CompareOp compare_op) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr(!hipstd::is_offloadable_callable()) + { + hipstd::unsupported_callable_type(); + } + + std::partial_sort(std::execution::par, first, last, d_first, d_last, std::move(compare_op)); + } + + template () + && hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last, + CompareOp compare_op) + { + const size_t count = static_cast(thrust::distance(first, last)); + const size_t d_count = static_cast(thrust::distance(d_first, d_last)); + + if(count == 0 || d_count == 0) + { + return; + } + + const size_t d_index = d_count - 1; + + auto policy = thrust::device; + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::partial_sort_copy(nullptr, + storage_size, + first, + d_first, + d_index, + count, + compare_op, + stream, + debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp( + policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::partial_sort_copy( + ptr, storage_size, first, d_first, d_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), + "partial_sort_copy: failed to synchronize"); + } + + template ()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + + std::partial_sort_copy(std::execution::par, first, last, d_first, d_last); + } + + template ()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last) + { + typedef typename thrust::iterator_value::type item_type; + std::partial_sort_copy(policy, first, last, d_first, d_last, thrust::less()); + } // END PARTIAL_SORT_COPY // BEGIN PARTITION From f584551b8b2206fa9f1ce814422d0b16b987ad2f Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Thu, 8 Aug 2024 12:29:40 +0000 Subject: [PATCH 35/44] Fixed several naming issues --- thrust/system/hip/hipstdpar/hipstdpar_lib.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp index b60958bb7..876e37d15 100644 --- a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp +++ b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar_lib.hpp +/*! \file thrust/system/hip/hipstdpar/hipstdpar_lib.hpp * \brief Implementation detail forwarding header for HIPSTDPAR. */ @@ -1241,7 +1241,7 @@ !::hipstd::is_offloadable_iterator() || !::hipstd::is_offloadable_callable()>* = nullptr> inline - void generate_n(execution::parallel_unsequenced_policy, I f, I l, G g) + void generate(execution::parallel_unsequenced_policy, I f, I l, G g) { if constexpr (!::hipstd::is_offloadable_iterator()) { ::hipstd::unsupported_iterator_category< @@ -1252,7 +1252,7 @@ } return - ::std::generate_n(::std::execution::par, f, l, ::std::move(g)); + ::std::generate(::std::execution::par, f, l, ::std::move(g)); } // END GENERATE @@ -2487,7 +2487,7 @@ hipstd::unsupported_callable_type(); } - std::partial_sort(std::execution::par, first, last, d_first, d_last, std::move(compare_op)); + std::partial_sort_copy(std::execution::par, first, last, d_first, d_last, std::move(compare_op)); } template Date: Fri, 9 Aug 2024 15:54:08 +0000 Subject: [PATCH 36/44] Added missing unimplemented algorithms --- thrust/system/hip/hipstdpar/hipstdpar_lib.hpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp index 876e37d15..37a650d3a 100644 --- a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp +++ b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp @@ -3116,6 +3116,14 @@ // TODO: UNIMPLEMENTED IN THRUST // END ROTATE_COPY + // BEGIN SEARCH + // TODO: UNIMPLEMENTED IN THRUST + // END SEARCH + + // BEGIN SEARCH_N + // TODO: UNIMPLEMENTED IN THRUST + // END SEARCH_N + // BEGIN SET_DIFFERENCE template< typename I0, @@ -3512,6 +3520,14 @@ } // END SET_UNION + // BEGIN SHIFT_LEFT + // TODO: UNIMPLEMENTED IN THRUST + // END SHIFT_LEFT + + // BEGIN SHIFT_RIGHT + // TODO: UNIMPLEMENTED IN THRUST + // END SHIFT_RIGHT + // BEGIN SORT template< typename I, From 8aff938930142937a527c1d54bc202ba57c26183 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Fri, 9 Aug 2024 15:34:48 +0000 Subject: [PATCH 37/44] Split hipstdpar_lib.hpp --- thrust/system/hip/hipstdpar/hipstdpar_lib.hpp | 4908 +---------------- thrust/system/hip/hipstdpar/impl/batch.hpp | 107 + thrust/system/hip/hipstdpar/impl/copy.hpp | 157 + .../system/hip/hipstdpar/impl/generation.hpp | 161 + thrust/system/hip/hipstdpar/impl/heap.hpp | 39 + thrust/system/hip/hipstdpar/impl/hipstd.hpp | 89 + .../hipstdpar/impl/interpose_allocations.hpp | 217 + .../impl/lexicographical_comparison.hpp | 138 + thrust/system/hip/hipstdpar/impl/merge.hpp | 135 + thrust/system/hip/hipstdpar/impl/min_max.hpp | 213 + thrust/system/hip/hipstdpar/impl/numeric.hpp | 795 +++ .../hip/hipstdpar/impl/order_changing.hpp | 101 + .../hip/hipstdpar/impl/partitioning.hpp | 201 + thrust/system/hip/hipstdpar/impl/removing.hpp | 295 + thrust/system/hip/hipstdpar/impl/search.hpp | 683 +++ thrust/system/hip/hipstdpar/impl/set.hpp | 514 ++ thrust/system/hip/hipstdpar/impl/sorting.hpp | 559 ++ thrust/system/hip/hipstdpar/impl/swap.hpp | 63 + .../hip/hipstdpar/impl/transformation.hpp | 296 + .../hip/hipstdpar/impl/uninitialized.hpp | 389 ++ 20 files changed, 5174 insertions(+), 4886 deletions(-) create mode 100644 thrust/system/hip/hipstdpar/impl/batch.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/copy.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/generation.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/heap.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/hipstd.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/merge.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/min_max.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/numeric.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/order_changing.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/partitioning.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/removing.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/search.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/set.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/sorting.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/swap.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/transformation.hpp create mode 100644 thrust/system/hip/hipstdpar/impl/uninitialized.hpp diff --git a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp index 37a650d3a..20d0e9496 100644 --- a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp +++ b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp @@ -13,4897 +13,33 @@ */ /*! \file thrust/system/hip/hipstdpar/hipstdpar_lib.hpp - * \brief Implementation detail forwarding header for HIPSTDPAR. + * \brief Forwarding header for HIPSTDPAR. */ #pragma once #if defined(__HIPSTDPAR__) - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - #include - - #include - #include - #include - #include - #include - #include - - #if defined(__HIPSTDPAR_INTERPOSE_ALLOC__) - #include - - #include - #include - #include - #include - - namespace hipstd - { - struct Header { - void* alloc_ptr; - std::size_t size; - std::size_t align; - }; - - inline std::pmr::synchronized_pool_resource heap{ - std::pmr::pool_options{0u, 15u * 1024u}, - []() { - static class final : public std::pmr::memory_resource { - // TODO: add exception handling - void* do_allocate(std::size_t n, std::size_t a) override - { - void* r{}; - hipMallocManaged(&r, n); - - return r; - } - - void do_deallocate( - void* p, std::size_t, std::size_t) override - { - hipFree(p); - } - - bool do_is_equal( - const std::pmr::memory_resource& x) - const noexcept override - { - return dynamic_cast(&x); - } - } r; - - return &r; - }()}; - } // Namespace hipstd. - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_aligned_alloc(std::size_t a, std::size_t n) - { // TODO: tidy up, revert to using std. - auto m = n + sizeof(hipstd::Header) + a - 1; - - auto r = hipstd::heap.allocate(m, a); - - if (!r) return r; - - const auto h = static_cast(r) + 1; - const auto p = (reinterpret_cast(h) + a - 1) & -a; - reinterpret_cast(p)[-1] = {r, m, a}; - - return reinterpret_cast(p); - } - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_malloc(std::size_t n) - { - constexpr auto a = alignof(std::max_align_t); - - return __hipstdpar_aligned_alloc(a, n); - } - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_calloc(std::size_t n, std::size_t sz) - { - return std::memset(__hipstdpar_malloc(n * sz), 0, n * sz); - } - - extern "C" - inline - __attribute__((used)) - int __hipstdpar_posix_aligned_alloc( - void** p, std::size_t a, std::size_t n) - { // TODO: check invariants on alignment - if (!p || n == 0) return 0; - - *p = __hipstdpar_aligned_alloc(a, n); - - return 1; - } - - extern "C" __attribute__((weak)) void __hipstdpar_hidden_free(void*); - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_realloc(void* p, std::size_t n) - { - auto q = std::memcpy(__hipstdpar_malloc(n), p, n); - - auto h = static_cast(p) - 1; - - hipPointerAttribute_t tmp{}; - auto r = hipPointerGetAttributes(&tmp, h); - - if (!tmp.isManaged) __hipstdpar_hidden_free(p); - else hipstd::heap.deallocate(h->alloc_ptr, h->size, h->align); - - return q; - } - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_realloc_array(void* p, std::size_t n, std::size_t sz) - { // TODO: handle overflow in n * sz gracefully, as per spec. - return __hipstdpar_realloc(p, n * sz); - } - - extern "C" - inline - __attribute__((used)) - void __hipstdpar_free(void* p) - { - auto h = static_cast(p) - 1; - - hipPointerAttribute_t tmp{}; - auto r = hipPointerGetAttributes(&tmp, h); - - if (!tmp.isManaged) return __hipstdpar_hidden_free(p); - - return hipstd::heap.deallocate(h->alloc_ptr, h->size, h->align); - } - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_operator_new_aligned(std::size_t n, std::size_t a) - { - if (auto p = __hipstdpar_aligned_alloc(a, n)) return p; - - throw std::runtime_error{"Failed __hipstdpar_operator_new_aligned"}; - } - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_operator_new(std::size_t n) - { // TODO: consider adding the special handling for operator new - return - __hipstdpar_operator_new_aligned(n, alignof(std::max_align_t)); - } - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_operator_new_nothrow( - std::size_t n, std::nothrow_t) noexcept - { - try { - return __hipstdpar_operator_new(n); - } - catch (...) { - // TODO: handle the potential exception - } - } - - extern "C" - inline - __attribute__((used)) - void* __hipstdpar_operator_new_aligned_nothrow( - std::size_t n, std::size_t a, std::nothrow_t) noexcept - { // TODO: consider adding the special handling for operator new - try { - return __hipstdpar_operator_new_aligned(n, a); - } - catch (...) { - // TODO: handle the potential exception. - } - } - - extern "C" - inline - __attribute__((used)) - void __hipstdpar_operator_delete_aligned_sized( - void* p, std::size_t n, std::size_t a) noexcept - { - hipPointerAttribute_t tmp{}; - auto r = hipPointerGetAttributes(&tmp, p); - - if (!tmp.isManaged) return __hipstdpar_hidden_free(p); - - return hipstd::heap.deallocate(p, n, a); - } - - extern "C" - inline - __attribute__((used)) - void __hipstdpar_operator_delete(void* p) noexcept - { - return __hipstdpar_free(p); - } - - extern "C" - inline - __attribute__((used)) - void __hipstdpar_operator_delete_aligned(void* p, std::size_t) noexcept - { // TODO: use alignment - return __hipstdpar_free(p); - } - - extern "C" - inline - __attribute__((used)) - void __hipstdpar_operator_delete_sized(void* p, std::size_t n) noexcept - { - return __hipstdpar_operator_delete_aligned_sized( - p, n, alignof(std::max_align_t)); - } - #endif - - namespace hipstd - { - template - inline - constexpr - bool is_offloadable_callable() noexcept - { - return std::conjunction_v< - std::negation>..., - std::negation>...>; - } - - template - struct Is_offloadable_iterator : std::false_type {}; - template - struct Is_offloadable_iterator< - I, - std::void_t< - decltype(std::declval() < std::declval()), - decltype(std::declval() += std::declval()), - decltype(std::declval() + std::declval()), - decltype(std::declval()[std::declval()]), - decltype(*std::declval())>> : std::true_type - {}; - - template - inline - constexpr - bool is_offloadable_iterator() noexcept - { - #if defined(__cpp_lib_concepts) - return (... && std::random_access_iterator); - #else - return std::conjunction_v...>; - #endif - } - - template - inline - constexpr - __attribute__((diagnose_if( - true, - "HIP Standard Parallelism does not support passing pointers to " - "function as callable arguments, execution will not be " - "offloaded.", - "warning"))) - void unsupported_callable_type() noexcept - {} - - template - inline - constexpr - __attribute__((diagnose_if( - true, - "HIP Standard Parallelism requires random access iterators, " - "execution will not be offloaded.", - "warning"))) - void unsupported_iterator_category() noexcept - {} - } - - namespace std - { - // BEGIN ADJACENT_DIFFERENCE - template< - typename I, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O adjacent_difference( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::adjacent_difference(::thrust::device, fi, li, fo); - } - - template< - typename I, - typename O, - enable_if_t()>* = nullptr> - inline - O adjacent_difference( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return - ::std::adjacent_difference(::std::execution::par, fi, li, fo); - } - - - template< - typename I, - typename O, - typename Op, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O adjacent_difference( - execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) - { - return ::thrust::adjacent_difference( - ::thrust::device, fi, li, fo, ::std::move(op)); - } - - template< - typename I, - typename O, - typename Op, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O adjacent_difference( - execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::adjacent_difference( - ::std::execution::par, fi, li, fo, ::std::move(op)); - } - // END ADJACENT_DIFFERENCE - - // BEGIN ADJACENT_FIND - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I adjacent_find(execution::parallel_unsequenced_policy, I f, I l) - { - if (f == l) return l; - - const auto r = ::thrust::mismatch( - ::thrust::device, f + 1, l, f, not_equal_to<>{}); - - return (r.first == l) ? l : r.second; - } - - template< - typename I, - typename P, - enable_if_t()>* = nullptr> - inline - I adjacent_find(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::adjacent_find(::std::execution::par, f, l); - } - - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I adjacent_find(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if (f == l) return l; - - const auto r = ::thrust::mismatch( - ::thrust::device, f + 1, l, f, not_fn(::std::move(p))); - - return (r.first == l) ? l : r.second; - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I adjacent_find(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::adjacent_find( - ::std::execution::par, f, l, ::std::move(p)); - } - // END ADJACENT_FIND - - // BEGIN ALL_OF - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool all_of(execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::all_of(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool all_of(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::all_of(::std::execution::par, f, l, ::std::move(p)); - } - // END ALL_OF - - // BEGIN ANY_OF - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool any_of(execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::any_of(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool any_of(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::any_of(::std::execution::par, f, l, ::std::move(p)); - } - // END ANY_OF - - // BEGIN COPY - template< - typename I, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::copy(::thrust::device, fi, li, fo); - } - - template< - typename I, - typename O, - enable_if_t()>* = nullptr> - inline - O copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::copy(::std::execution::par, fi, li, fo); - } - // END COPY - - // BEGIN COPY_IF - template< - typename I, - typename O, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - O copy_if(execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) - { - return - ::thrust::copy_if(::thrust::device, fi, li, fo, ::std::move(p)); - } - - template< - typename I, - typename O, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - O copy_if(execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::copy_if( - ::std::execution::par, fi, li, fo, ::std::move(p)); - } - // END COPY_IF - - // BEGIN COPY_N - template< - typename I, - typename N, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O copy_n(execution::parallel_unsequenced_policy, I fi, N n, O fo) - { - return ::thrust::copy_n(::thrust::device, fi, n, fo); - } - - template< - typename I, - typename N, - typename O, - enable_if_t()>* = nullptr> - inline - O copy_n(execution::parallel_unsequenced_policy, I fi, N n, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::copy_n(::std::execution::par, fi, n, fo); - } - // END COPY_N - - // BEGIN COUNT - template< - typename I, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - typename iterator_traits::difference_type count( - execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - return ::thrust::count(::thrust::device, f, l, x); - } - - template< - typename I, - typename T, - enable_if_t()>* = nullptr> - inline - typename iterator_traits::difference_type count( - execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::count(::std::execution::par, f, l, x); - } - // END COUNT - - // BEGIN COUNT_IF - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - typename iterator_traits::difference_type count_if( - execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::count_if(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename O, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - typename iterator_traits::difference_type count_if( - execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::count_if(::std::execution::par, f, l, ::std::move(p)); - } - // END COUNT_IF - - // BEGIN DESTROY - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void destroy(execution::parallel_unsequenced_policy, I f, I l) - { - ::thrust::for_each(f, l, [](auto& x) { destroy_at(addressof(x)); }); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - void destroy(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::destroy(::std::execution::par, f, l); - } - // END DESTROY - - // BEGIN DESTROY_N - template< - typename I, - typename N, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void destroy_n(execution::parallel_unsequenced_policy, I f, N n) - { - ::thrust::for_each_n(f, n, [](auto& x) { - destroy_at(addressof(x)); - }); - } - - template< - typename I, - typename N, - enable_if_t()>* = nullptr> - inline - void destroy_n(execution::parallel_unsequenced_policy, I f, N n) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::destroy_n(::std::execution::par, f, n); - } - // END DESTROY_N - - // BEGIN EQUAL - template< - typename I0, - typename I1, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool equal(execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) - { - return ::thrust::equal(::thrust::device, f0, l0, f1); - } - - template< - typename I0, - typename I1, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool equal(execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::equal(::std::execution::par, f0, l0, f1); - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool equal( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, R r) - { - return - ::thrust::equal(::thrust::device, f0, l0, f1, ::std::move(r)); - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool equal( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - return - ::std::equal(::std::execution::par, f0, l0, f1, ::std::move(r)); - } - - template< - typename I0, - typename I1, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool equal( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - if (l0 - f0 != l1 - f1) return false; - - return ::thrust::equal(::thrust::device, f0, l0, f1); - } - - template< - typename I0, - typename I1, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool equal( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::equal(::std::execution::par, f0, l0, f1, l1); - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool equal( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - R r) - { - if (l0 - f0 != l1 - f1) return false; - - return ::thrust::equal( - ::thrust::device, f0, l0, f1, ::std::move(r)); - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool equal( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - return ::std::equal( - ::std::execution::par, f0, l0, f1, l1, ::std::move(r)); - } - // END EQUAL - - // BEGIN EXCLUSIVE_SCAN - template< - typename I, - typename O, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O exclusive_scan( - execution::parallel_unsequenced_policy, I fi, I li, O fo, T x) - { - return ::thrust::exclusive_scan( - ::thrust::device, fi, li, fo, ::std::move(x)); - } - - template< - typename I, - typename O, - typename T, - enable_if_t()>* = nullptr> - inline - O exclusive_scan( - execution::parallel_unsequenced_policy, I fi, I li, O fo, T x) - { - ::hipstd::unsupported_iterator_category< - typename std::iterator_traits::iterator_category, - typename std::iterator_traits::iterator_category>(); - - return ::std::exclusive_scan( - ::std::execution::par, fi, li, fo, ::std::move(x)); - } - - template< - typename I, - typename O, - typename T, - typename Op, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O exclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - T x, - Op op) - { - return ::thrust::exclusive_scan( - ::thrust::device, fi, li, fo, ::std::move(x), ::std::move(op)); - } - - template< - typename I, - typename O, - typename T, - typename Op, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O exclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - T x, - Op op) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::exclusive_scan( - ::std::execution::par, - fi, - li, - fo, - ::std::move(x), - ::std::move(op)); - } - // END EXCLUSIVE_SCAN - - // BEGIN FILL - template< - typename I, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void fill(execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - return ::thrust::fill(::thrust::device, f, l, x); - } - - template< - typename I, - typename T, - enable_if_t()>* = nullptr> - inline - void fill(execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::fill(::std::execution::par, f, l, x); - } - // END FILL - - // BEGIN FILL_N - template< - typename I, - typename N, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void fill_n( - execution::parallel_unsequenced_policy, I f, N n, const T& x) - { - return ::thrust::fill_n(::thrust::device, f, n, x); - } - - template< - typename I, - typename N, - typename T, - enable_if_t()>* = nullptr> - inline - void fill_n( - execution::parallel_unsequenced_policy, I f, N n, const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::fill_n(::std::execution::par, f, n, x); - } - // END FILL_N - - // BEGIN FIND - template< - typename I, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I find(execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - return ::thrust::find(::thrust::device, f, l, x); - } - - template< - typename I, - typename T, - enable_if_t()>* = nullptr> - inline - I find(execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::find(::std::execution::par, f, l, x); - } - // END FIND - - // BEGIN FIND_END - // TODO: UNIMPLEMENTED IN THRUST - // END FIND_END - - // BEGIN FIND_FIRST_OF - // TODO: UNIMPLEMENTED IN THRUST - // END FIND_FIRST_OF - - // BEGIN FIND_IF - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I find_if(execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::find_if(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I find_if(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::find_if(::std::execution::par, f, l, ::std::move(p)); - } - // END FIND_IF - - // BEGIN FIND_IF_NOT - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I find_if_not(execution::parallel_unsequenced_policy, I f, I l, P p) - { - return - ::thrust::find_if_not(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I find_if_not(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return - ::std::find_if_not(::std::execution::par, f, l, ::std::move(p)); - } - // END FIND_IF_NOT - - // BEGIN FOR_EACH - template< - typename I, - typename F, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - void for_each(execution::parallel_unsequenced_policy, I f, I l, F fn) - { - ::thrust::for_each(::thrust::device, f, l, ::std::move(fn)); - } - - template< - typename I, - typename F, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - void for_each(execution::parallel_unsequenced_policy, I f, I l, F fn) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::for_each(::std::execution::par, f, l, ::std::move(fn)); - } - // END FOR_EACH - - // BEGIN FOR_EACH_N - template< - typename I, - typename N, - typename F, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - I for_each_n(execution::parallel_unsequenced_policy, I f, N n, F fn) - { - return - ::thrust::for_each_n(::thrust::device, f, n, ::std::move(fn)); - } - - template< - typename I, - typename N, - typename F, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - I for_each_n(execution::parallel_unsequenced_policy, I f, N n, F fn) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::for_each_n(::std::execution::par, f, n, ::std::move(fn)); - } - // END FOR_EACH_N - - // BEGIN GENERATE - template< - typename I, - typename G, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - void generate(execution::parallel_unsequenced_policy, I f, I l, G g) - { - return ::thrust::generate(::thrust::device, f, l, ::std::move(g)); - } - - template< - typename I, - typename G, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - void generate(execution::parallel_unsequenced_policy, I f, I l, G g) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::generate(::std::execution::par, f, l, ::std::move(g)); - } - // END GENERATE - - // BEGIN GENERATE_N - template< - typename I, - typename N, - typename G, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - void generate_n(execution::parallel_unsequenced_policy, I f, N n, G g) - { - return ::thrust::generate_n(::thrust::device, f, n, ::std::move(g)); - } - - template< - typename I, - typename N, - typename G, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - void generate_n(execution::parallel_unsequenced_policy, I f, N n, G g) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::generate_n(::std::execution::par, f, n, ::std::move(g)); - } - // END GENERATE_N - - // BEGIN INCLUDES - template< - typename I0, - typename I1, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool includes( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - ::thrust::discard_iterator<> cnt{0}; - - return ::thrust::set_difference( - ::thrust::device, f1, l1, f0, l0, cnt) == cnt; - } - - template< - typename I0, - typename I1, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool includes( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::includes(::std::execution::par, f0, l0, f1, l1); - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool includes( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - R r) - { - ::thrust::discard_iterator<> cnt{0}; - - return ::thrust::set_difference( - ::thrust::device, f1, l1, f0, l0, cnt, ::std::move(r)) == cnt; - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool includes( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::includes( - ::std::execution::par, f1, l1, f0, l0, ::std::move(r)); - } - // END INCLUDES - - // BEGIN INCLUSIVE_SCAN - template< - typename I, - typename O, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O inclusive_scan( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::inclusive_scan(::thrust::device, fi, li, fo); - } - - template< - typename I, - typename O, - typename T, - enable_if_t()>* = nullptr> - inline - O inclusive_scan( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::inclusive_scan(::std::execution::par, fi, li, fo); - } - - template< - typename I, - typename O, - typename Op, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O inclusive_scan( - execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) - { - return ::thrust::inclusive_scan( - ::thrust::device, fi, li, fo, ::std::move(op)); - } - - template< - typename I, - typename O, - typename Op, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O inclusive_scan( - execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::inclusive_scan( - ::std::execution::par, fi, li, fo, ::std::move(op)); - } - - template< - typename I, - typename O, - typename Op, - typename T, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O inclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - Op op, - T x) - { // TODO: this is highly inefficient due to rocThrust not exposing - // this particular interface where the user provides x. - if (fi == li) return fo; - - auto lo = - ::thrust::inclusive_scan(::thrust::device, fi, li, fo, op); - - return ::thrust::transform( - ::thrust::device, - fo, - lo, - fo, - [op = ::std::move(op), x = ::std::move(x)](auto&& y) { - return op(x, y); - }); - } - - template< - typename I, - typename O, - typename Op, - typename T, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O inclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - Op op, - T x) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::inclusive_scan( - ::std::execution::par, - fi, - li, - fo, - ::std::move(op), - ::std::move(x)); - } - // END INCLUSIVE_SCAN - - // BEGIN INPLACE_MERGE - // TODO: UNIMPLEMENTED IN THRUST - // END INPLACE_MERGE - - // BEGIN IS_HEAP - // TODO: UNIMPLEMENTED IN THRUST - // END IS_HEAP - - // BEGIN IS_HEAP_UNTIL - // TODO: UNIMPLEMENTED IN THRUST - // END IS_HEAP_UNTIL - - // BEGIN IS_PARTITIONED - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool is_partitioned( - execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::is_partitioned( - ::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool is_partitioned( - execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::is_partitioned( - ::std::execution::par, f, l, ::std::move(p)); - } - // END IS_PARTITIONED - - // BEGIN IS_SORTED - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool is_sorted(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::is_sorted(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - bool is_sorted(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::is_sorted(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool is_sorted(execution::parallel_unsequenced_policy, I f, I l, R r) - { - return ::thrust::is_sorted(::thrust::device, f, l, ::std::move(r)); - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool is_sorted(execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::is_sorted(::std::execution::par, f, l, ::std::move(r)); - } - // END IS_SORTED - - // BEGIN IS_SORTED_UNTIL - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::is_sorted_until(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::is_sorted_until(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l, R r) - { - return ::thrust::is_sorted_until( - ::thrust::device, f, l, ::std::move(r)); - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::is_sorted_until( - ::std::execution::par, f, l, ::std::move(r)); - } - // END IS_SORTED_UNTIL - - // BEGIN LEXICOGRAPHICAL_COMPARE - template< - typename I0, - typename I1, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - bool lexicographical_compare( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - if (f0 == l0) return f1 != l1; - if (f1 == l1) return false; - - const auto n0 = l0 - f0; - const auto n1 = l1 - f1; - const auto n = ::std::min(n0, n1); - - const auto m = ::thrust::mismatch(::thrust::device, f0, f0 + n, f1); - - if (m.first == f0 + n) return n0 < n1; - - return *m.first < *m.second; - } - - template< - typename I0, - typename I1, - enable_if_t()>* = nullptr> - inline - bool lexicographical_compare( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::lexicographical_compare( - ::std::execution::par, f0, l0, f1, l1); - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool lexicographical_compare( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - R r) - { - if (f0 == l0) return f1 != l1; - if (f1 == l1) return false; - - const auto n0 = l0 - f0; - const auto n1 = l1 - f1; - const auto n = ::std::min(n0, n1); - - const auto m = ::thrust::mismatch( - ::thrust::device, - f0, - f0 + n, - f1, - [=](auto&& x, auto&& y) { return !r(x, y) && !r(y, x); }); - - if (m.first == f0 + n) return n0 < n1; - - return r(*m.first, *m.second); - } - - template< - typename I0, - typename I1, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - bool lexicographical_compare( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::lexicographical_compare( - ::std::execution::par, f0, l0, f1, l1, ::std::move(r)); - } - // END LEXICOGRAPHICAL_COMPARE - - // BEGIN MAX_ELEMENT - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I max_element(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::max_element(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - I max_element(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::max_element(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - I max_element(execution::parallel_unsequenced_policy, I f, I l, R r) - { - return - ::thrust::max_element(::thrust::device, f, l, ::std::move(r)); - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - I max_element(execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::max_element(::std::execution::par, f, l, ::std::move(r)); - } - // END MAX_ELEMENT - - // BEGIN MERGE - template< - typename I0, - typename I1, - typename O, - enable_if_t< - ::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O merge( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - O fo) - { - return ::thrust::merge(::thrust::device, f0, l0, f1, l1, fo); - } - - template< - typename I0, - typename I1, - typename O, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O merge( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::merge(::std::execution::par, f0, l0, f1, l1, fo); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O merge( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - O fo, - R r) - { - return ::thrust::merge( - ::thrust::device, f0, l0, f1, l1, fo, ::std::move(r)); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O merge( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - O fo, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::merge( - ::std::execution::par, f0, l0, f1, l1, fo, ::std::move(r)); - } - // END MERGE - - // BEGIN MIN_ELEMENT - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I min_element(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::min_element(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - I min_element(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::min_element(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - I min_element(execution::parallel_unsequenced_policy, I f, I l, R r) - { - return - ::thrust::min_element(::thrust::device, f, l, ::std::move(r)); - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - I min_element(execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::min_element(::std::execution::par, f, l, ::std::move(r)); - } - // END MIN_ELEMENT - - // BEGIN MINMAX_ELEMENT - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - pair minmax_element( - execution::parallel_unsequenced_policy, I f, I l) - { - auto [m, M] = ::thrust::minmax_element(::thrust::device, f, l); - - return {::std::move(m), ::std::move(M)}; - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - pair minmax_element( - execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::minmax_element(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - pair minmax_element( - execution::parallel_unsequenced_policy, I f, I l, R r) - { - auto [m, M] = ::thrust::minmax_element( - ::thrust::device, f, l, ::std::move(r)); - - return {::std::move(m), ::std::move(M)}; - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - pair minmax_element( - execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::minmax_element( - ::std::execution::par, f, l, ::std::move(r)); - } - // END MINMAX_ELEMENT - - // BEGIN MISMATCH - template< - typename I0, - typename I1, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) - { - auto [m0, m1] = ::thrust::mismatch(::thrust::device, f0, l0, f1); - - return {::std::move(m0), ::std::move(m1)}; - } - - template< - typename I0, - typename I1, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::mismatch(::std::execution::par, f0, l0, f1); - } - - template< - typename I0, - typename I1, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, P p) - { - auto [m0, m1] = ::thrust::mismatch( - ::thrust::device, f0, l0, f1, ::std::move(p)); - - return {::std::move(m0), ::std::move(m1)}; - } - - template< - typename I0, - typename I1, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::mismatch( - ::std::execution::par, f0, l0, f1, ::std::move(p)); - } - - template< - typename I0, - typename I1, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - const auto n = ::std::min(l0 - f0, l1 - f1); - - auto [m0, m1] = - ::thrust::mismatch(::thrust::device, f0, f0 + n, f1); - - return {::std::move(m0), ::std::move(m1)}; - } - - template< - typename I0, - typename I1, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::mismatch(::std::execution::par, f0, l0, f1, l1); - } - - template< - typename I0, - typename I1, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - P p) - { - const auto n = ::std::min(l0 - f0, l1 - f1); - - auto [m0, m1] = ::thrust::mismatch( - ::thrust::device, f0, f0 + n, f1, ::std::move(p)); - - return {::std::move(m0), ::std::move(m1)}; - } - - template< - typename I0, - typename I1, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - pair mismatch( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - I1 l1, - P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::mismatch( - ::std::execution::par, f0, l0, f1, l1, ::std::move(p)); - } - // END MISMATCH - - // BEGIN MOVE - template< - typename I, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O move(execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::copy( - ::thrust::device, - make_move_iterator(fi), - make_move_iterator(li), - fo); - } - - template< - typename I, - typename O, - enable_if_t()>* = nullptr> - inline - O move(execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::move(::std::execution::par, fi, li, fo); - } - // END MOVE - - // BEGIN NONE_OF - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool none_of(execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::none_of(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - bool none_of(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::none_of(::std::execution::par, f, l, ::std::move(p)); - } - // END NONE_OF - - // BEGIN NTH_ELEMENT - template () - || !hipstd::is_offloadable_callable()>* = nullptr> - inline void nth_element(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt nth, - KeysIt last, - CompareOp compare_op) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr(!hipstd::is_offloadable_callable()) - { - hipstd::unsupported_callable_type(); - } - - std::nth_element(std::execution::par, first, nth, last, std::move(compare_op)); - } - - template () - && hipstd::is_offloadable_callable()>* = nullptr> - inline void nth_element(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt nth, - KeysIt last, - CompareOp compare_op) - { - const size_t count = static_cast(thrust::distance(first, last)); - const size_t n = static_cast(thrust::distance(first, nth)); - - if(count == 0) - { - return; - } - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::nth_element( - nullptr, storage_size, first, n, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 1st step"); - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::nth_element( - ptr, storage_size, first, n, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "nth_element: failed to synchronize"); - } - - template ()>* = nullptr> - inline void - nth_element(execution::parallel_unsequenced_policy, KeysIt first, KeysIt nth, KeysIt last) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - - std::nth_element(std::execution::par, first, nth, last); - } - - template ()>* = nullptr> - inline void nth_element(execution::parallel_unsequenced_policy policy, - KeysIt first, - KeysIt nth, - KeysIt last) - { - typedef typename thrust::iterator_value::type item_type; - std::nth_element(policy, first, nth, last, thrust::less()); - } - // END NTH_ELEMENT - - // BEGIN PARTIAL_SORT - template () - || !hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt middle, - KeysIt last, - CompareOp compare_op) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr(!hipstd::is_offloadable_callable()) - { - hipstd::unsupported_callable_type(); - } - - std::partial_sort(std::execution::par, first, middle, last, std::move(compare_op)); - } - - template () - && hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt middle, - KeysIt last, - CompareOp compare_op) - { - const size_t count = static_cast(thrust::distance(first, last)); - const size_t n = static_cast(thrust::distance(first, middle)); - - if(count == 0 || n == 0) - { - return; - } - - const size_t n_index = n - 1; - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::partial_sort( - nullptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 1st step"); - - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::partial_sort( - ptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "partial_sort: failed to synchronize"); - } - - template ()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt middle, - KeysIt last) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - - std::partial_sort(std::execution::par, first, middle, last); - } - - template ()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy policy, - KeysIt first, - KeysIt middle, - KeysIt last) - { - typedef typename thrust::iterator_value::type item_type; - std::partial_sort(policy, first, middle, last, thrust::less()); - } - // END PARTIAL_SORT - - // BEGIN PARTIAL_SORT_COPY - template () - || !hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last, - CompareOp compare_op) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr(!hipstd::is_offloadable_callable()) - { - hipstd::unsupported_callable_type(); - } - - std::partial_sort_copy(std::execution::par, first, last, d_first, d_last, std::move(compare_op)); - } - - template () - && hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last, - CompareOp compare_op) - { - const size_t count = static_cast(thrust::distance(first, last)); - const size_t d_count = static_cast(thrust::distance(d_first, d_last)); - - if(count == 0 || d_count == 0) - { - return; - } - - const size_t d_index = d_count - 1; - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::partial_sort_copy(nullptr, - storage_size, - first, - d_first, - d_index, - count, - compare_op, - stream, - debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 1st step"); - - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::partial_sort_copy( - ptr, storage_size, first, d_first, d_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "partial_sort_copy: failed to synchronize"); - } - - template ()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - - std::partial_sort_copy(std::execution::par, first, last, d_first, d_last); - } - - template ()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last) - { - typedef typename thrust::iterator_value::type item_type; - std::partial_sort_copy(policy, first, last, d_first, d_last, thrust::less()); - } - // END PARTIAL_SORT_COPY - - // BEGIN PARTITION - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I partition(execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::partition(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I partition(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return - ::std::partition(::std::execution::par, f, l, ::std::move(p)); - } - // END PARTITION - - // BEGIN PARTITION_COPY - template< - typename I, - typename O0, - typename O1, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - pair partition_copy( - execution::parallel_unsequenced_policy, - I f, - I l, - O0 fo0, - O1 fo1, - P p) - { - auto [r0, r1] = ::thrust::partition_copy( - ::thrust::device, f, l, fo0, fo1, ::std::move(p)); - - return {::std::move(r0), ::std::move(r1)}; - } - - template< - typename I, - typename O0, - typename O1, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - pair partition_copy( - execution::parallel_unsequenced_policy, - I f, - I l, - O0 fo0, - O1 fo1, - P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::partition_copy( - ::std::execution::par, f, l, fo0, fo1, ::std::move(p)); - } - // END PARTITION_COPY - - // BEGIN REDUCE - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - typename iterator_traits::value_type reduce( - execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::reduce(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - typename iterator_traits::value_type reduce( - execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::reduce(::std::execution::par, f, l); - } - - template< - typename I, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - T reduce(execution::parallel_unsequenced_policy, I f, I l, T x) - { - return ::thrust::reduce(::thrust::device, f, l, ::std::move(x)); - } - - template< - typename I, - typename T, - enable_if_t()>* = nullptr> - inline - T reduce(execution::parallel_unsequenced_policy, I f, I l, T x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::reduce(::std::execution::par, f, l, ::std::move(x)); - } - - template< - typename I, - typename T, - typename Op, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - T reduce(execution::parallel_unsequenced_policy, I f, I l, T x, Op op) - { - return ::thrust::reduce( - ::thrust::device, f, l, ::std::move(x), ::std::move(op)); - } - - template< - typename I, - typename T, - typename Op, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - T reduce(execution::parallel_unsequenced_policy, I f, I l, T x, Op op) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::reduce( - ::std::execution::par, f, l, ::std::move(x), ::std::move(op)); - } - // END REDUCE - - // BEGIN REMOVE - template< - typename I, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I remove(execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - return ::thrust::remove(::thrust::device, f, l, x); - } - - template< - typename I, - typename T, - enable_if_t()>* = nullptr> - inline - I remove(execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::remove(::std::execution::par, f, l, x); - } - // END REMOVE - - // BEGIN REMOVE_COPY - template< - typename I, - typename O, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O remove_copy( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - const T& x) - { - return ::thrust::remove_copy(::thrust::device, fi, li, fo, x); - } - - template< - typename I, - typename O, - typename T, - enable_if_t()>* = nullptr> - inline - O remove_copy( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::remove_copy(::std::execution::par, fi, li, fo, x); - } - // END REMOVE_COPY - - // BEGIN REMOVE_COPY_IF - template< - typename I, - typename O, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - O remove_copy_if( - execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) - { - return ::thrust::remove_copy_if( - ::thrust::device, fi, li, fo, ::std::move(p)); - } - - template< - typename I, - typename O, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - O remove_copy_if( - execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::remove_copy_if( - ::std::execution::par, fi, li, fo, ::std::move(p)); - } - // END REMOVE_COPY_IF - - // BEGIN REMOVE_IF - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I remove_if(execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::remove_if(::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I remove_if(execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return - ::std::remove_if(::std::execution::par, f, l, ::std::move(p)); - } - // END REMOVE_IF - - // BEGIN REPLACE - template< - typename I, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void replace( - execution::parallel_unsequenced_policy, - I f, - I l, - const T& x, - const T& y) - { - return ::thrust::replace(::thrust::device, f, l, x, y); - } - - template< - typename I, - typename T, - enable_if_t()>* = nullptr> - inline - void replace( - execution::parallel_unsequenced_policy, - I f, - I l, - const T& x, - const T& y) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::replace(::std::execution::par, f, l, x, y); - } - // END REPLACE - - // BEGIN REPLACE_COPY - template< - typename I, - typename O, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void replace_copy( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - const T& x, - const T& y) - { - return ::thrust::replace_copy(::thrust::device, fi, li, fo, x, y); - } - - template< - typename I, - typename O, - typename T, - enable_if_t()>* = nullptr> - inline - void replace_copy( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - const T& x, - const T& y) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::replace_copy(::std::execution::par, fi, li, fo, x, y); - } - // END REPLACE_COPY - - // BEGIN REPLACE_COPY_IF - template< - typename I, - typename O, - typename P, - typename T, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - void replace_copy_if( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - P p, - const T& x) - { - return ::thrust::replace_copy_if( - ::thrust::device, fi, li, fo, ::std::move(p), x); - } - - template< - typename I, - typename O, - typename P, - typename T, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - void replace_copy_if( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - P p, - const T& x) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::replace_copy_if( - ::std::execution::par, fi, li, fo, ::std::move(p), x); - } - // END REPLACE_COPY_IF - - // BEGIN REPLACE_IF - template< - typename I, - typename P, - typename T, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - void replace_if( - execution::parallel_unsequenced_policy, I f, I l, P p, const T& x) - { - return - ::thrust::replace_if(::thrust::device, f, l, ::std::move(p), x); - } - - template< - typename I, - typename P, - typename T, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - void replace_if( - execution::parallel_unsequenced_policy, I f, I l, P p, const T& x) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::replace_if( - ::std::execution::par, f, l, ::std::move(p), x); - } - // END REPLACE_IF - - // BEGIN REVERSE - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void reverse(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::reverse(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - void reverse(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::reverse(::std::execution::par, f, l); - } - // END REVERSE - - // BEGIN REVERSE_COPY - template< - typename I, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void reverse_copy( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::reverse_copy(::thrust::device, fi, li, fo); - } - - template< - typename I, - typename O, - enable_if_t()>* = nullptr> - inline - void reverse_copy( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::reverse_copy(::std::execution::par, fi, li, fo); - } - // END REVERSE_COPY - - // BEGIN ROTATE - // TODO: UNIMPLEMENTED IN THRUST - // END ROTATE - - // BEGIN ROTATE_COPY - // TODO: UNIMPLEMENTED IN THRUST - // END ROTATE_COPY - - // BEGIN SEARCH - // TODO: UNIMPLEMENTED IN THRUST - // END SEARCH - - // BEGIN SEARCH_N - // TODO: UNIMPLEMENTED IN THRUST - // END SEARCH_N - - // BEGIN SET_DIFFERENCE - template< - typename I0, - typename I1, - typename O, - enable_if_t< - ::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O set_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - return ::thrust::set_difference( - ::thrust::device, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O set_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::set_difference( - ::std::execution::par, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - return ::thrust::set_difference( - ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::set_difference( - ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - // END SET_DIFFERENCE - - // BEGIN SET_INTERSECTION - template< - typename I0, - typename I1, - typename O, - enable_if_t< - ::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O set_intersection( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - return ::thrust::set_intersection( - ::thrust::device, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O set_intersection( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::set_intersection( - ::std::execution::par, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_intersection( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - return ::thrust::set_intersection( - ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_intersection( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::set_intersection( - ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - // END SET_INTERSECTION - - // BEGIN SET_SYMMETRIC_DIFFERENCE - template< - typename I0, - typename I1, - typename O, - enable_if_t< - ::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O set_symmetric_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - return ::thrust::set_symmetric_difference( - ::thrust::device, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O set_symmetric_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::set_symmetric_difference( - ::std::execution::par, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_symmetric_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - return ::thrust::set_symmetric_difference( - ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_symmetric_difference( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::set_symmetric_difference( - ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - // END SET_SYMMETRIC_DIFFERENCE - - // BEGIN SET_UNION - template< - typename I0, - typename I1, - typename O, - enable_if_t< - ::hipstd::is_offloadable_iterator>* = nullptr> - inline - O set_union( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - return - ::thrust::set_union(::thrust::device, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - enable_if_t< - !::hipstd::is_offloadable_iterator>* = nullptr> - inline - O set_union( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return - ::std::set_union(::std::execution::par, fi0, li0, fi1, li1, fo); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_union( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - return ::thrust::set_union( - ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - - template< - typename I0, - typename I1, - typename O, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O set_union( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - I1 li1, - O fo, - R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::set_union( - ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); - } - // END SET_UNION - - // BEGIN SHIFT_LEFT - // TODO: UNIMPLEMENTED IN THRUST - // END SHIFT_LEFT - - // BEGIN SHIFT_RIGHT - // TODO: UNIMPLEMENTED IN THRUST - // END SHIFT_RIGHT - - // BEGIN SORT - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void sort(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::sort(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - void sort(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::sort(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - void sort(execution::parallel_unsequenced_policy, I f, I l, R r) - { - return ::thrust::sort(::thrust::device, f, l, ::std::move(r)); - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - void sort(execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::sort(::std::execution::par, f, l, ::std::move(r)); - } - // END SORT - - // BEGIN STABLE_PARTITION - template< - typename I, - typename P, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I stable_partition( - execution::parallel_unsequenced_policy, I f, I l, P p) - { - return ::thrust::stable_partition( - ::thrust::device, f, l, ::std::move(p)); - } - - template< - typename I, - typename P, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable

()>* = nullptr> - inline - I stable_partition( - execution::parallel_unsequenced_policy, I f, I l, P p) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable

()) { - ::hipstd::unsupported_callable_type

(); - } - - return ::std::stable_partition( - ::std::execution::par, f, l, ::std::move(p)); - } - // END STABLE_PARTITION - - // BEGIN STABLE_SORT - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void stable_sort(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::stable_sort(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - void stable_sort(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::stable_sort(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - void stable_sort(execution::parallel_unsequenced_policy, I f, I l, R r) - { - return - ::thrust::stable_sort(::thrust::device, f, l, ::std::move(r)); - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - void stable_sort(execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return - ::std::stable_sort(::std::execution::par, f, l, ::std::move(r)); - } - // END STABLE_SORT - - // BEGIN SWAP_RANGES - template< - typename I0, - typename I1, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I1 swap_ranges( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) - { - return ::thrust::swap_ranges(::thrust::device, f0, l0, f1); - } - - template< - typename I0, - typename I1, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I1 swap_ranges( - execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::swap_ranges(::std::execution::par, f0, l0, f1); - } - // END SWAP_RANGES - - // BEGIN TRANSFORM - template< - typename I, - typename O, - typename F, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform( - execution::parallel_unsequenced_policy, I fi, I li, O fo, F fn) - { - return ::thrust::transform( - ::thrust::device, fi, li, fo, ::std::move(fn)); - } - - template< - typename I, - typename O, - typename F, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform( - execution::parallel_unsequenced_policy, I fi, I li, O fo, F fn) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::transform( - ::std::execution::par, fi, li, fo, ::std::move(fn)); - } - - template< - typename I0, - typename I1, - typename O, - typename F, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - O fo, - F fn) - { - return ::thrust::transform( - ::thrust::device, fi0, li0, fi1, fo, ::std::move(fn)); - } - - template< - typename I0, - typename I1, - typename O, - typename F, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform( - execution::parallel_unsequenced_policy, - I0 fi0, - I0 li0, - I1 fi1, - O fo, - F fn) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::transform( - ::std::execution::par, fi0, li0, fi1, fo, ::std::move(fn)); - } - // END TRANSFORM - - // BEGIN TRANSFORM_EXCLUSIVE_SCAN - template< - typename I, - typename O, - typename T, - typename Op0, - typename Op1, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform_exclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - T x, - Op0 op0, - Op1 op1) - { - return ::thrust::transform_exclusive_scan( - ::thrust::device, - fi, - li, - fo, - ::std::move(op1), - ::std::move(x), - ::std::move(op0)); - } - - template< - typename I, - typename O, - typename T, - typename Op0, - typename Op1, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform_exclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - T x, - Op0 op0, - Op1 op1) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::transform_exclusive_scan( - ::std::execution::par, - fi, - li, - fo, - ::std::move(x), - ::std::move(op0), - ::std::move(op1)); - } - // END TRANSFORM_EXCLUSIVE_SCAN - - // BEGIN TRANSFORM_INCLUSIVE_SCAN - template< - typename I, - typename O, - typename Op0, - typename Op1, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform_inclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - Op0 op0, - Op1 op1) - { - return ::thrust::transform_inclusive_scan( - ::thrust::device, - fi, - li, - fo, - ::std::move(op1), - ::std::move(op0)); - } - - template< - typename I, - typename O, - typename Op0, - typename Op1, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform_inclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - Op0 op0, - Op1 op1) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::transform_inclusive_scan( - ::std::execution::par, - fi, - li, - fo, - ::std::move(op0), - ::std::move(op1)); - } - - template< - typename I, - typename O, - typename Op0, - typename Op1, - typename T, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform_inclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - Op0 op0, - Op1 op1, - T x) - { // TODO: this is inefficient. - if (fi == li) return fo; - - auto lo = ::thrust::transform_inclusive_scan( - ::thrust::device, - fi, - li, - fo, - ::std::move(op1), - op0); - - return ::thrust::transform( - ::thrust::device, - fo, - lo, - fo, - [op0 = ::std::move(op0), x = ::std::move(x)](auto&& y) { - return op0(x, y); - }); - } - - template< - typename I, - typename O, - typename Op0, - typename Op1, - typename T, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O transform_inclusive_scan( - execution::parallel_unsequenced_policy, - I fi, - I li, - O fo, - Op0 op0, - Op1 op1, - T x) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::transform_inclusive_scan( - ::std::execution::par, - fi, - li, - fo, - ::std::move(op0), - ::std::move(op1), - ::std::move(x)); - } - // END TRANSFORM_INCLUSIVE_SCAN - - // BEGIN TRANSFORM_REDUCE - template< - typename I0, - typename I1, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - T transform_reduce( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - T x) - { - return ::thrust::inner_product( - ::thrust::device, f0, l0, f1, ::std::move(x)); - } - - template< - typename I0, - typename I1, - typename T, - enable_if_t< - !::hipstd::is_offloadable_iterator()>* = nullptr> - inline - T transform_reduce( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - T x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::transform_reduce( - ::std::execution::par, f0, l0, f1, ::std::move(x)); - } - - template< - typename I0, - typename I1, - typename T, - typename Op0, - typename Op1, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - T transform_reduce( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - T x, - Op0 op0, - Op1 op1) - { - return ::thrust::inner_product( - ::thrust::device, - f0, - l0, - f1, - ::std::move(x), - ::std::move(op0), - ::std::move(op1)); - } - - template< - typename I0, - typename I1, - typename T, - typename Op0, - typename Op1, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - T transform_reduce( - execution::parallel_unsequenced_policy, - I0 f0, - I0 l0, - I1 f1, - T x, - Op0 op0, - Op1 op1) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::transform_reduce( - ::std::execution::par, - f0, - l0, - f1, - ::std::move(x), - ::std::move(op0), - ::std::move(op1)); - } - - template< - typename I, - typename T, - typename Op0, - typename Op1, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - T transform_reduce( - execution::parallel_unsequenced_policy, - I f, - I l, - T x, - Op0 op0, - Op1 op1) - { - return ::thrust::transform_reduce( - ::thrust::device, - f, - l, - ::std::move(op1), - ::std::move(x), - ::std::move(op0)); - } - - template< - typename I, - typename T, - typename Op0, - typename Op1, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - T transform_reduce( - execution::parallel_unsequenced_policy, - I f, - I l, - T x, - Op0 op0, - Op1 op1) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::transform_reduce( - ::std::execution::par, - f, - l, - ::std::move(x), - ::std::move(op0), - ::std::move(op1)); - } - // END TRANSFORM_REDUCE - - // BEGIN UNINITIALIZED_COPY - template< - typename I, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O uninitialized_copy( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::uninitialized_copy(::thrust::device, fi, li, fo); - } - - template< - typename I, - typename O, - enable_if_t()>* = nullptr> - inline - O uninitialized_copy( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::uninitialized_copy(::std::execution::par, fi, li, fo); - } - // END UNINITIALIZED_COPY - - // BEGIN UNINITIALIZED_COPY_N - template< - typename I, - typename N, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O uninitialized_copy_n( - execution::parallel_unsequenced_policy, I fi, N n, O fo) - { - return ::thrust::uninitialized_copy_n(::thrust::device, fi, n, fo); - } - - template< - typename I, - typename N, - typename O, - enable_if_t()>* = nullptr> - inline - O uninitialized_copy_n( - execution::parallel_unsequenced_policy, I fi, N n, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return - ::std::uninitialized_copy_n(::std::execution::par, fi, n, fo); - } - // END UNINITIALIZED_COPY_N - - // BEGIN UNINITIALIZED_DEFAULT_CONSTRUCT - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void uninitialized_default_construct( - execution::parallel_unsequenced_policy, I f, I l) - { - ::thrust::for_each(::thrust::device, f, l, [](auto& x) { - auto p = const_cast( - static_cast((addressof(x)))); - ::new (p) typename iterator_traits::value_type; - }); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - void uninitialized_default_construct( - execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::uninitialized_default_construct( - ::std::execution::par, f, l); - } - // END UNINITIALIZED_DEFAULT_CONSTRUCT - - // BEGIN UNINITIALIZED_DEFAULT_CONSTRUCT_N - template< - typename I, - typename N, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void uninitialized_default_construct_n( - execution::parallel_unsequenced_policy, I f, N n) - { - ::thrust::for_each_n(::thrust::device, f, n, [](auto& x) { - auto p = const_cast( - static_cast((addressof(x)))); - ::new (p) typename iterator_traits::value_type; - }); - } - - template< - typename I, - typename N, - enable_if_t()>* = nullptr> - inline - void uninitialized_default_construct_n( - execution::parallel_unsequenced_policy, I f, N n) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::uninitialized_default_construct_n( - ::std::execution::par, f, n); - } - // END UNINITIALIZED_DEFAULT_CONSTRUCT_N - - // BEGIN UNINITIALIZED_FILL - template< - typename I, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void uninitialized_fill( - execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - return ::thrust::uninitialized_fill(::thrust::device, f, l, x); - } - - template< - typename I, - typename T, - enable_if_t()>* = nullptr> - inline - void uninitialized_fill( - execution::parallel_unsequenced_policy, I f, I l, const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::offload_category>(); - - return ::std::uninitialized_fill(::std::execution::par, f, l, x); - } - // END UNINITIALIZED_FILL - - // BEGIN UNINITIALIZED_FILL_N - template< - typename I, - typename N, - typename T, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void uninitialized_fill( - execution::parallel_unsequenced_policy, I f, N n, const T& x) - { - return ::thrust::uninitialized_fill_n(::thrust::device, f, n, x); - } - - template< - typename I, - typename N, - typename T, - enable_if_t()>* = nullptr> - inline - void uninitialized_fill( - execution::parallel_unsequenced_policy, I f, N n, const T& x) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::uninitialized_fill_n(::std::execution::par, f, n, x); - } - // END UNINITIALIZED_FILL_N - - // BEGIN UNINITIALIZED_MOVE - template< - typename I, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O uninitialized_move( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::uninitialized_copy( - ::thrust::device, - make_move_iterator(fi), - make_move_iterator(li), - fo); - } - - template< - typename I, - typename O, - enable_if_t()>* = nullptr> - inline - O uninitialized_move( - execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::uninitialized_move(::std::execution::par, fi, li, fo); - } - // END UNINITIALIZED_MOVE - - // BEGIN UNINITIALIZED_MOVE_N - template< - typename I, - typename N, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O uninitialized_move_n( - execution::parallel_unsequenced_policy, I fi, N n, O fo) - { - return ::thrust::uninitialized_copy_n( - ::thrust::device, make_move_iterator(fi), n, fo); - } - - template< - typename I, - typename N, - typename O, - enable_if_t()>* = nullptr> - inline - O uninitialized_move_n( - execution::parallel_unsequenced_policy, I fi, N n, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return - ::std::uninitialized_move_n(::std::execution::par, fi, n, fo); - } - // END UNINITIALIZED_MOVE_N - - // BEGIN UNINITIALIZED_VALUE_CONSTRUCT - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void uninitialized_value_construct( - execution::parallel_unsequenced_policy, I f, I l) - { - ::thrust::for_each(::thrust::device, f, l, [](auto& x) { - auto p = const_cast( - static_cast((addressof(x)))); - ::new (p) typename iterator_traits::value_type{}; - }); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - void uninitialized_value_construct( - execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::uninitialized_value_construct( - ::std::execution::par, f, l); - } - // END UNINITIALIZED_VALUE_CONSTRUCT - - // BEGIN UNINITIALIZED_VALUE_CONSTRUCT_N - template< - typename I, - typename N, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - void uninitialized_value_construct_n( - execution::parallel_unsequenced_policy, I f, N n) - { - ::thrust::for_each_n(::thrust::device, f, n, [](auto& x) { - auto p = const_cast( - static_cast((addressof(x)))); - ::new (p) typename iterator_traits::value_type{}; - }); - } - - template< - typename I, - typename N, - enable_if_t()>* = nullptr> - inline - void uninitialized_value_construct_n( - execution::parallel_unsequenced_policy, I f, N n) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::uninitialized_value_construct_n( - ::std::execution::par, f, n); - } - // END UNINITIALIZED_VALUE_CONSTRUCT_N - - // BEGIN UNIQUE - template< - typename I, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - I unique(execution::parallel_unsequenced_policy, I f, I l) - { - return ::thrust::unique(::thrust::device, f, l); - } - - template< - typename I, - enable_if_t()>* = nullptr> - inline - I unique(execution::parallel_unsequenced_policy, I f, I l) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - - return ::std::unique(::std::execution::par, f, l); - } - - template< - typename I, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - I unique(execution::parallel_unsequenced_policy, I f, I l, R r) - { - return ::thrust::unique(::thrust::device, f, l, ::std::move(r)); - } - - template< - typename I, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - I unique(execution::parallel_unsequenced_policy, I f, I l, R r) - { - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::unique(::std::execution::par, f, l, ::std::move(r)); - } - // END UNIQUE - - // BEGIN UNIQUE_COPY - template< - typename I, - typename O, - enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> - inline - O unique_copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - return ::thrust::unique_copy(::thrust::device, fi, li, fo); - } - - template< - typename I, - typename O, - enable_if_t()>* = nullptr> - inline - O unique_copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) - { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - - return ::std::unique_copy(::std::execution::par, fi, li, fo); - } - - template< - typename I, - typename O, - typename R, - enable_if_t< - ::hipstd::is_offloadable_iterator() && - ::hipstd::is_offloadable_callable()>* = nullptr> - inline - O unique_copy( - execution::parallel_unsequenced_policy, I fi, I li, O fo, R r) - { - return ::thrust::unique_copy( - ::thrust::device, fi, li, fo, ::std::move(r)); - } - - template< - typename I, - typename O, - typename R, - enable_if_t< - !::hipstd::is_offloadable_iterator() || - !::hipstd::is_offloadable_callable()>* = nullptr> - inline - O unique_copy( - execution::parallel_unsequenced_policy, I fi, I li, O fo, R r) - { - - if constexpr (!::hipstd::is_offloadable_iterator()) { - ::hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr (!::hipstd::is_offloadable_callable()) { - ::hipstd::unsupported_callable_type(); - } - - return ::std::unique_copy( - ::std::execution::par, fi, li, fo, ::std::move(r)); - } - // END UNIQUE_COPY - - // BEGIN NTH_ELEMENT - - template () - || !hipstd::is_offloadable_callable()>* = nullptr> - inline void nth_element(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt nth, - KeysIt last, - CompareOp compare_op) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr(!hipstd::is_offloadable_callable()) - { - hipstd::unsupported_callable_type(); - } - - std::nth_element(std::execution::par, first, nth, last, std::move(compare_op)); - } - - template () - && hipstd::is_offloadable_callable()>* = nullptr> - inline void nth_element(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt nth, - KeysIt last, - CompareOp compare_op) - { - const size_t count = static_cast(thrust::distance(first, last)); - const size_t n = static_cast(thrust::distance(first, nth)); - - if(count == 0) - { - return; - } - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::nth_element( - nullptr, storage_size, first, n, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 1st step"); - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::nth_element( - ptr, storage_size, first, n, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "nth_element: failed to synchronize"); - } - - template ()>* = nullptr> - inline void - nth_element(execution::parallel_unsequenced_policy, KeysIt first, KeysIt nth, KeysIt last) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - - std::nth_element(std::execution::par, first, nth, last); - } - - template ()>* = nullptr> - inline void nth_element(execution::parallel_unsequenced_policy policy, - KeysIt first, - KeysIt nth, - KeysIt last) - { - typedef typename thrust::iterator_value::type item_type; - std::nth_element(policy, first, nth, last, thrust::less()); - } - - // END NTH_ELEMENT - - // BEGIN PARTIAL_SORT - - template () - || !hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt middle, - KeysIt last, - CompareOp compare_op) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - if constexpr(!hipstd::is_offloadable_callable()) - { - hipstd::unsupported_callable_type(); - } - - std::partial_sort(std::execution::par, first, middle, last, std::move(compare_op)); - } - - template () - && hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt middle, - KeysIt last, - CompareOp compare_op) - { - const size_t count = static_cast(thrust::distance(first, last)); - const size_t n = static_cast(thrust::distance(first, middle)); - - if(count == 0 || n == 0) - { - return; - } - - const size_t n_index = n - 1; - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::partial_sort( - nullptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 1st step"); - - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::partial_sort( - ptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "partial_sort: failed to synchronize"); - } - - template ()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy, - KeysIt first, - KeysIt middle, - KeysIt last) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category>(); - } - - std::partial_sort(std::execution::par, first, middle, last); - } - - template ()>* = nullptr> - inline void partial_sort(execution::parallel_unsequenced_policy policy, - KeysIt first, - KeysIt middle, - KeysIt last) - { - typedef typename thrust::iterator_value::type item_type; - std::partial_sort(policy, first, middle, last, thrust::less()); - } - - // END PARTIAL_SORT - - // BEGIN PARTIAL_SORT_COPY - - template () - || !hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last, - CompareOp compare_op) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - if constexpr(!hipstd::is_offloadable_callable()) - { - hipstd::unsupported_callable_type(); - } - - std::partial_sort(std::execution::par, first, last, d_first, d_last, std::move(compare_op)); - } - - template () - && hipstd::is_offloadable_callable()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last, - CompareOp compare_op) - { - const size_t count = static_cast(thrust::distance(first, last)); - const size_t d_count = static_cast(thrust::distance(d_first, d_last)); - - if(count == 0 || d_count == 0) - { - return; - } - - const size_t d_index = d_count - 1; - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::partial_sort_copy(nullptr, - storage_size, - first, - d_first, - d_index, - count, - compare_op, - stream, - debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 1st step"); - - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::partial_sort_copy( - ptr, storage_size, first, d_first, d_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "partial_sort_copy: failed to synchronize"); - } - - template ()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last) - { - if constexpr(!hipstd::is_offloadable_iterator()) - { - hipstd::unsupported_iterator_category< - typename iterator_traits::iterator_category, - typename iterator_traits::iterator_category>(); - } - - std::partial_sort_copy(std::execution::par, first, last, d_first, d_last); - } - - template ()>* = nullptr> - inline void partial_sort_copy(execution::parallel_unsequenced_policy policy, - ForwardIt first, - ForwardIt last, - RandomIt d_first, - RandomIt d_last) - { - typedef typename thrust::iterator_value::type item_type; - std::partial_sort_copy(policy, first, last, d_first, d_last, thrust::less()); - } - - // END PARTIAL_SORT_COPY - } + // Interposed allocations + #include "impl/interpose_allocations.hpp" + + // Parallel STL algorithms + #include "impl/batch.hpp" + #include "impl/copy.hpp" + #include "impl/generation.hpp" + #include "impl/heap.hpp" + #include "impl/lexicographical_comparison.hpp" + #include "impl/merge.hpp" + #include "impl/min_max.hpp" + #include "impl/numeric.hpp" + #include "impl/order_changing.hpp" + #include "impl/partitioning.hpp" + #include "impl/removing.hpp" + #include "impl/search.hpp" + #include "impl/set.hpp" + #include "impl/sorting.hpp" + #include "impl/swap.hpp" + #include "impl/transformation.hpp" + #include "impl/uninitialized.hpp" #endif diff --git a/thrust/system/hip/hipstdpar/impl/batch.hpp b/thrust/system/hip/hipstdpar/impl/batch.hpp new file mode 100644 index 000000000..103176bdd --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/batch.hpp @@ -0,0 +1,107 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/batch.hpp + * \brief Batch operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN FOR_EACH + template< + typename I, + typename F, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + void for_each(execution::parallel_unsequenced_policy, I f, I l, F fn) + { + ::thrust::for_each(::thrust::device, f, l, ::std::move(fn)); + } + + template< + typename I, + typename F, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + void for_each(execution::parallel_unsequenced_policy, I f, I l, F fn) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::for_each(::std::execution::par, f, l, ::std::move(fn)); + } + // END FOR_EACH + + // BEGIN FOR_EACH_N + template< + typename I, + typename N, + typename F, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + I for_each_n(execution::parallel_unsequenced_policy, I f, N n, F fn) + { + return + ::thrust::for_each_n(::thrust::device, f, n, ::std::move(fn)); + } + + template< + typename I, + typename N, + typename F, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + I for_each_n(execution::parallel_unsequenced_policy, I f, N n, F fn) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::for_each_n(::std::execution::par, f, n, ::std::move(fn)); + } + // END FOR_EACH_N +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/copy.hpp b/thrust/system/hip/hipstdpar/impl/copy.hpp new file mode 100644 index 000000000..d69df37ee --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/copy.hpp @@ -0,0 +1,157 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/copy.hpp + * \brief Copy operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN COPY + template< + typename I, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::copy(::thrust::device, fi, li, fo); + } + + template< + typename I, + typename O, + enable_if_t()>* = nullptr> + inline + O copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::copy(::std::execution::par, fi, li, fo); + } + // END COPY + + // BEGIN COPY_IF + template< + typename I, + typename O, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + O copy_if(execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) + { + return + ::thrust::copy_if(::thrust::device, fi, li, fo, ::std::move(p)); + } + + template< + typename I, + typename O, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + O copy_if(execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::copy_if( + ::std::execution::par, fi, li, fo, ::std::move(p)); + } + // END COPY_IF + + // BEGIN COPY_N + template< + typename I, + typename N, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O copy_n(execution::parallel_unsequenced_policy, I fi, N n, O fo) + { + return ::thrust::copy_n(::thrust::device, fi, n, fo); + } + + template< + typename I, + typename N, + typename O, + enable_if_t()>* = nullptr> + inline + O copy_n(execution::parallel_unsequenced_policy, I fi, N n, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::copy_n(::std::execution::par, fi, n, fo); + } + // END COPY_N + + // BEGIN MOVE + template< + typename I, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O move(execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::copy( + ::thrust::device, + make_move_iterator(fi), + make_move_iterator(li), + fo); + } + + template< + typename I, + typename O, + enable_if_t()>* = nullptr> + inline + O move(execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::move(::std::execution::par, fi, li, fo); + } + // END MOVE +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/generation.hpp b/thrust/system/hip/hipstdpar/impl/generation.hpp new file mode 100644 index 000000000..f87a01a88 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/generation.hpp @@ -0,0 +1,161 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/generation.hpp + * \brief Generation operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN FILL + template< + typename I, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void fill(execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + return ::thrust::fill(::thrust::device, f, l, x); + } + + template< + typename I, + typename T, + enable_if_t()>* = nullptr> + inline + void fill(execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::fill(::std::execution::par, f, l, x); + } + // END FILL + + // BEGIN FILL_N + template< + typename I, + typename N, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void fill_n( + execution::parallel_unsequenced_policy, I f, N n, const T& x) + { + return ::thrust::fill_n(::thrust::device, f, n, x); + } + + template< + typename I, + typename N, + typename T, + enable_if_t()>* = nullptr> + inline + void fill_n( + execution::parallel_unsequenced_policy, I f, N n, const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::fill_n(::std::execution::par, f, n, x); + } + // END FILL_N + + // BEGIN GENERATE + template< + typename I, + typename G, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + void generate(execution::parallel_unsequenced_policy, I f, I l, G g) + { + return ::thrust::generate(::thrust::device, f, l, ::std::move(g)); + } + + template< + typename I, + typename G, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + void generate(execution::parallel_unsequenced_policy, I f, I l, G g) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::generate(::std::execution::par, f, l, ::std::move(g)); + } + // END GENERATE + + // BEGIN GENERATE_N + template< + typename I, + typename N, + typename G, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + void generate_n(execution::parallel_unsequenced_policy, I f, N n, G g) + { + return ::thrust::generate_n(::thrust::device, f, n, ::std::move(g)); + } + + template< + typename I, + typename N, + typename G, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + void generate_n(execution::parallel_unsequenced_policy, I f, N n, G g) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::generate_n(::std::execution::par, f, n, ::std::move(g)); + } + // END GENERATE_N +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/heap.hpp b/thrust/system/hip/hipstdpar/impl/heap.hpp new file mode 100644 index 000000000..4e3dc81af --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/heap.hpp @@ -0,0 +1,39 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/heap.hpp + * \brief Heap operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +// rocThrust includes + +// STL includes + +namespace std +{ + // BEGIN IS_HEAP + // TODO: UNIMPLEMENTED IN THRUST + // END IS_HEAP + + // BEGIN IS_HEAP_UNTIL + // TODO: UNIMPLEMENTED IN THRUST + // END IS_HEAP_UNTIL +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/hipstd.hpp b/thrust/system/hip/hipstdpar/impl/hipstd.hpp new file mode 100644 index 000000000..079704a1f --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/hipstd.hpp @@ -0,0 +1,89 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/hipstd.hpp + * \brief hipstd utilities implementation detail header for HIPSTDPAR. + */ + +#ifndef THRUST_SYSTEM_HIP_HIPSTDPAR_HIPSTD_HPP +#define THRUST_SYSTEM_HIP_HIPSTDPAR_HIPSTD_HPP + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include +#include +#include +#include + +namespace hipstd +{ +template +inline constexpr bool is_offloadable_callable() noexcept +{ + return std::conjunction_v>..., + std::negation>...>; +} + +template +struct Is_offloadable_iterator : std::false_type +{ +}; +template +struct Is_offloadable_iterator< + I, + std::void_t() < std::declval()), + decltype(std::declval() += std::declval()), + decltype(std::declval() + std::declval()), + decltype(std::declval()[std::declval()]), + decltype(*std::declval())>> : std::true_type +{ +}; + +template +inline constexpr bool is_offloadable_iterator() noexcept +{ +#if defined(__cpp_lib_concepts) + return (... && std::random_access_iterator); +#else + return std::conjunction_v...>; +#endif +} + +template +inline constexpr + __attribute__((diagnose_if(true, + "HIP Standard Parallelism does not support passing pointers to " + "function as callable arguments, execution will not be " + "offloaded.", + "warning"))) void + unsupported_callable_type() noexcept +{ +} + +template +inline constexpr + __attribute__((diagnose_if(true, + "HIP Standard Parallelism requires random access iterators, " + "execution will not be offloaded.", + "warning"))) void + unsupported_iterator_category() noexcept +{ +} +} +#endif // __HIPSTDPAR__ + +#endif // THRUST_SYSTEM_HIP_HIPSTDPAR_HIPSTD_HPP + diff --git a/thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp b/thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp new file mode 100644 index 000000000..46afd0ca8 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp @@ -0,0 +1,217 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/interpose_allocations.hpp + * \brief Interposed allocations/deallocations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) +#if defined(__HIPSTDPAR_INTERPOSE_ALLOC__) +#include + +#include +#include +#include +#include +#include +#include + +namespace hipstd +{ +struct Header +{ + void* alloc_ptr; + std::size_t size; + std::size_t align; +}; + +inline std::pmr::synchronized_pool_resource heap { + std::pmr::pool_options {0u, 15u * 1024u}, []() { + static class final : public std::pmr::memory_resource + { + // TODO: add exception handling + void* do_allocate(std::size_t n, std::size_t a) override + { + void* r {}; + hipMallocManaged(&r, n); + + return r; + } + + void do_deallocate(void* p, std::size_t, std::size_t) override + { + hipFree(p); + } + + bool do_is_equal(const std::pmr::memory_resource& x) const noexcept override + { + return dynamic_cast(&x); + } + } r; + + return &r; + }()}; +} // Namespace hipstd. + +extern "C" inline __attribute__((used)) void* __hipstdpar_aligned_alloc(std::size_t a, + std::size_t n) +{ // TODO: tidy up, revert to using std. + auto m = n + sizeof(hipstd::Header) + a - 1; + + auto r = hipstd::heap.allocate(m, a); + + if(!r) + return r; + + const auto h = static_cast(r) + 1; + const auto p = (reinterpret_cast(h) + a - 1) & -a; + reinterpret_cast(p)[-1] = {r, m, a}; + + return reinterpret_cast(p); +} + +extern "C" inline __attribute__((used)) void* __hipstdpar_malloc(std::size_t n) +{ + constexpr auto a = alignof(std::max_align_t); + + return __hipstdpar_aligned_alloc(a, n); +} + +extern "C" inline __attribute__((used)) void* __hipstdpar_calloc(std::size_t n, std::size_t sz) +{ + return std::memset(__hipstdpar_malloc(n * sz), 0, n * sz); +} + +extern "C" inline __attribute__((used)) int +__hipstdpar_posix_aligned_alloc(void** p, std::size_t a, std::size_t n) +{ // TODO: check invariants on alignment + if(!p || n == 0) + return 0; + + *p = __hipstdpar_aligned_alloc(a, n); + + return 1; +} + +extern "C" __attribute__((weak)) void __hipstdpar_hidden_free(void*); + +extern "C" inline __attribute__((used)) void* __hipstdpar_realloc(void* p, std::size_t n) +{ + auto q = std::memcpy(__hipstdpar_malloc(n), p, n); + + auto h = static_cast(p) - 1; + + hipPointerAttribute_t tmp {}; + auto r = hipPointerGetAttributes(&tmp, h); + + if(!tmp.isManaged) + __hipstdpar_hidden_free(p); + else + hipstd::heap.deallocate(h->alloc_ptr, h->size, h->align); + + return q; +} + +extern "C" inline __attribute__((used)) void* +__hipstdpar_realloc_array(void* p, std::size_t n, std::size_t sz) +{ // TODO: handle overflow in n * sz gracefully, as per spec. + return __hipstdpar_realloc(p, n * sz); +} + +extern "C" inline __attribute__((used)) void __hipstdpar_free(void* p) +{ + auto h = static_cast(p) - 1; + + hipPointerAttribute_t tmp {}; + auto r = hipPointerGetAttributes(&tmp, h); + + if(!tmp.isManaged) + return __hipstdpar_hidden_free(p); + + return hipstd::heap.deallocate(h->alloc_ptr, h->size, h->align); +} + +extern "C" inline __attribute__((used)) void* __hipstdpar_operator_new_aligned(std::size_t n, + std::size_t a) +{ + if(auto p = __hipstdpar_aligned_alloc(a, n)) + return p; + + throw std::runtime_error {"Failed __hipstdpar_operator_new_aligned"}; +} + +extern "C" inline __attribute__((used)) void* __hipstdpar_operator_new(std::size_t n) +{ // TODO: consider adding the special handling for operator new + return __hipstdpar_operator_new_aligned(n, alignof(std::max_align_t)); +} + +extern "C" inline __attribute__((used)) void* +__hipstdpar_operator_new_nothrow(std::size_t n, std::nothrow_t) noexcept +{ + try + { + return __hipstdpar_operator_new(n); + } + catch(...) + { + // TODO: handle the potential exception + } +} + +extern "C" inline __attribute__((used)) void* +__hipstdpar_operator_new_aligned_nothrow(std::size_t n, std::size_t a, std::nothrow_t) noexcept +{ // TODO: consider adding the special handling for operator new + try + { + return __hipstdpar_operator_new_aligned(n, a); + } + catch(...) + { + // TODO: handle the potential exception. + } +} + +extern "C" inline __attribute__((used)) void +__hipstdpar_operator_delete_aligned_sized(void* p, std::size_t n, std::size_t a) noexcept +{ + hipPointerAttribute_t tmp {}; + auto r = hipPointerGetAttributes(&tmp, p); + + if(!tmp.isManaged) + return __hipstdpar_hidden_free(p); + + return hipstd::heap.deallocate(p, n, a); +} + +extern "C" inline __attribute__((used)) void __hipstdpar_operator_delete(void* p) noexcept +{ + return __hipstdpar_free(p); +} + +extern "C" inline __attribute__((used)) void +__hipstdpar_operator_delete_aligned(void* p, std::size_t) noexcept +{ // TODO: use alignment + return __hipstdpar_free(p); +} + +extern "C" inline __attribute__((used)) void +__hipstdpar_operator_delete_sized(void* p, std::size_t n) noexcept +{ + return __hipstdpar_operator_delete_aligned_sized(p, n, alignof(std::max_align_t)); +} +#endif // __HIPSTDPAR_INTERPOSE_ALLOC__ +#endif // __HIPSTDPAR__ + diff --git a/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp b/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp new file mode 100644 index 000000000..f320c2d63 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp @@ -0,0 +1,138 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/lexicographical_comparison.hpp + * \brief Lexicographical comparison operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN LEXICOGRAPHICAL_COMPARE + template< + typename I0, + typename I1, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool lexicographical_compare( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + if (f0 == l0) return f1 != l1; + if (f1 == l1) return false; + + const auto n0 = l0 - f0; + const auto n1 = l1 - f1; + const auto n = ::std::min(n0, n1); + + const auto m = ::thrust::mismatch(::thrust::device, f0, f0 + n, f1); + + if (m.first == f0 + n) return n0 < n1; + + return *m.first < *m.second; + } + + template< + typename I0, + typename I1, + enable_if_t()>* = nullptr> + inline + bool lexicographical_compare( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::lexicographical_compare( + ::std::execution::par, f0, l0, f1, l1); + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool lexicographical_compare( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + R r) + { + if (f0 == l0) return f1 != l1; + if (f1 == l1) return false; + + const auto n0 = l0 - f0; + const auto n1 = l1 - f1; + const auto n = ::std::min(n0, n1); + + const auto m = ::thrust::mismatch( + ::thrust::device, + f0, + f0 + n, + f1, + [=](auto&& x, auto&& y) { return !r(x, y) && !r(y, x); }); + + if (m.first == f0 + n) return n0 < n1; + + return r(*m.first, *m.second); + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool lexicographical_compare( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::lexicographical_compare( + ::std::execution::par, f0, l0, f1, l1, ::std::move(r)); + } + // END LEXICOGRAPHICAL_COMPARE +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/merge.hpp b/thrust/system/hip/hipstdpar/impl/merge.hpp new file mode 100644 index 000000000..bdae50ca8 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/merge.hpp @@ -0,0 +1,135 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/merge.hpp + * \brief Merge operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN MERGE + template< + typename I0, + typename I1, + typename O, + enable_if_t< + ::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O merge( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + O fo) + { + return ::thrust::merge(::thrust::device, f0, l0, f1, l1, fo); + } + + template< + typename I0, + typename I1, + typename O, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O merge( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::merge(::std::execution::par, f0, l0, f1, l1, fo); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O merge( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + O fo, + R r) + { + return ::thrust::merge( + ::thrust::device, f0, l0, f1, l1, fo, ::std::move(r)); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O merge( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + O fo, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::merge( + ::std::execution::par, f0, l0, f1, l1, fo, ::std::move(r)); + } + // END MERGE + + // BEGIN INPLACE_MERGE + // TODO: UNIMPLEMENTED IN THRUST + // END INPLACE_MERGE +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/min_max.hpp b/thrust/system/hip/hipstdpar/impl/min_max.hpp new file mode 100644 index 000000000..2fcdd6744 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/min_max.hpp @@ -0,0 +1,213 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/min_max.hpp + * \brief Minimum/maximum operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN MAX_ELEMENT + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I max_element(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::max_element(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + I max_element(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::max_element(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + I max_element(execution::parallel_unsequenced_policy, I f, I l, R r) + { + return + ::thrust::max_element(::thrust::device, f, l, ::std::move(r)); + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + I max_element(execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::max_element(::std::execution::par, f, l, ::std::move(r)); + } + // END MAX_ELEMENT + + // BEGIN MIN_ELEMENT + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I min_element(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::min_element(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + I min_element(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::min_element(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + I min_element(execution::parallel_unsequenced_policy, I f, I l, R r) + { + return + ::thrust::min_element(::thrust::device, f, l, ::std::move(r)); + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + I min_element(execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::min_element(::std::execution::par, f, l, ::std::move(r)); + } + // END MIN_ELEMENT + + // BEGIN MINMAX_ELEMENT + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + pair minmax_element( + execution::parallel_unsequenced_policy, I f, I l) + { + auto [m, M] = ::thrust::minmax_element(::thrust::device, f, l); + + return {::std::move(m), ::std::move(M)}; + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + pair minmax_element( + execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::minmax_element(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + pair minmax_element( + execution::parallel_unsequenced_policy, I f, I l, R r) + { + auto [m, M] = ::thrust::minmax_element( + ::thrust::device, f, l, ::std::move(r)); + + return {::std::move(m), ::std::move(M)}; + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + pair minmax_element( + execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::minmax_element( + ::std::execution::par, f, l, ::std::move(r)); + } + // END MINMAX_ELEMENT +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/numeric.hpp b/thrust/system/hip/hipstdpar/impl/numeric.hpp new file mode 100644 index 000000000..c887a25a6 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/numeric.hpp @@ -0,0 +1,795 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/numeric.hpp + * \brief Numeric operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN ADJACENT_DIFFERENCE + template< + typename I, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O adjacent_difference( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::adjacent_difference(::thrust::device, fi, li, fo); + } + + template< + typename I, + typename O, + enable_if_t()>* = nullptr> + inline + O adjacent_difference( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return + ::std::adjacent_difference(::std::execution::par, fi, li, fo); + } + + + template< + typename I, + typename O, + typename Op, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O adjacent_difference( + execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) + { + return ::thrust::adjacent_difference( + ::thrust::device, fi, li, fo, ::std::move(op)); + } + + template< + typename I, + typename O, + typename Op, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O adjacent_difference( + execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::adjacent_difference( + ::std::execution::par, fi, li, fo, ::std::move(op)); + } + // END ADJACENT_DIFFERENCE + + // BEGIN REDUCE + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + typename iterator_traits::value_type reduce( + execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::reduce(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + typename iterator_traits::value_type reduce( + execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::reduce(::std::execution::par, f, l); + } + + template< + typename I, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + T reduce(execution::parallel_unsequenced_policy, I f, I l, T x) + { + return ::thrust::reduce(::thrust::device, f, l, ::std::move(x)); + } + + template< + typename I, + typename T, + enable_if_t()>* = nullptr> + inline + T reduce(execution::parallel_unsequenced_policy, I f, I l, T x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::reduce(::std::execution::par, f, l, ::std::move(x)); + } + + template< + typename I, + typename T, + typename Op, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + T reduce(execution::parallel_unsequenced_policy, I f, I l, T x, Op op) + { + return ::thrust::reduce( + ::thrust::device, f, l, ::std::move(x), ::std::move(op)); + } + + template< + typename I, + typename T, + typename Op, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + T reduce(execution::parallel_unsequenced_policy, I f, I l, T x, Op op) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::reduce( + ::std::execution::par, f, l, ::std::move(x), ::std::move(op)); + } + // END REDUCE + + // BEGIN EXCLUSIVE_SCAN + template< + typename I, + typename O, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O exclusive_scan( + execution::parallel_unsequenced_policy, I fi, I li, O fo, T x) + { + return ::thrust::exclusive_scan( + ::thrust::device, fi, li, fo, ::std::move(x)); + } + + template< + typename I, + typename O, + typename T, + enable_if_t()>* = nullptr> + inline + O exclusive_scan( + execution::parallel_unsequenced_policy, I fi, I li, O fo, T x) + { + ::hipstd::unsupported_iterator_category< + typename std::iterator_traits::iterator_category, + typename std::iterator_traits::iterator_category>(); + + return ::std::exclusive_scan( + ::std::execution::par, fi, li, fo, ::std::move(x)); + } + + template< + typename I, + typename O, + typename T, + typename Op, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O exclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + T x, + Op op) + { + return ::thrust::exclusive_scan( + ::thrust::device, fi, li, fo, ::std::move(x), ::std::move(op)); + } + + template< + typename I, + typename O, + typename T, + typename Op, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O exclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + T x, + Op op) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::exclusive_scan( + ::std::execution::par, + fi, + li, + fo, + ::std::move(x), + ::std::move(op)); + } + // END EXCLUSIVE_SCAN + + // BEGIN INCLUSIVE_SCAN + template< + typename I, + typename O, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O inclusive_scan( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::inclusive_scan(::thrust::device, fi, li, fo); + } + + template< + typename I, + typename O, + typename T, + enable_if_t()>* = nullptr> + inline + O inclusive_scan( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::inclusive_scan(::std::execution::par, fi, li, fo); + } + + template< + typename I, + typename O, + typename Op, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O inclusive_scan( + execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) + { + return ::thrust::inclusive_scan( + ::thrust::device, fi, li, fo, ::std::move(op)); + } + + template< + typename I, + typename O, + typename Op, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O inclusive_scan( + execution::parallel_unsequenced_policy, I fi, I li, O fo, Op op) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::inclusive_scan( + ::std::execution::par, fi, li, fo, ::std::move(op)); + } + + template< + typename I, + typename O, + typename Op, + typename T, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O inclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + Op op, + T x) + { // TODO: this is highly inefficient due to rocThrust not exposing + // this particular interface where the user provides x. + if (fi == li) return fo; + + auto lo = + ::thrust::inclusive_scan(::thrust::device, fi, li, fo, op); + + return ::thrust::transform( + ::thrust::device, + fo, + lo, + fo, + [op = ::std::move(op), x = ::std::move(x)](auto&& y) { + return op(x, y); + }); + } + + template< + typename I, + typename O, + typename Op, + typename T, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O inclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + Op op, + T x) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::inclusive_scan( + ::std::execution::par, + fi, + li, + fo, + ::std::move(op), + ::std::move(x)); + } + // END INCLUSIVE_SCAN + + // BEGIN TRANSFORM_REDUCE + template< + typename I0, + typename I1, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + T transform_reduce( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + T x) + { + return ::thrust::inner_product( + ::thrust::device, f0, l0, f1, ::std::move(x)); + } + + template< + typename I0, + typename I1, + typename T, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + T transform_reduce( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + T x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::transform_reduce( + ::std::execution::par, f0, l0, f1, ::std::move(x)); + } + + template< + typename I0, + typename I1, + typename T, + typename Op0, + typename Op1, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + T transform_reduce( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + T x, + Op0 op0, + Op1 op1) + { + return ::thrust::inner_product( + ::thrust::device, + f0, + l0, + f1, + ::std::move(x), + ::std::move(op0), + ::std::move(op1)); + } + + template< + typename I0, + typename I1, + typename T, + typename Op0, + typename Op1, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + T transform_reduce( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + T x, + Op0 op0, + Op1 op1) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::transform_reduce( + ::std::execution::par, + f0, + l0, + f1, + ::std::move(x), + ::std::move(op0), + ::std::move(op1)); + } + + template< + typename I, + typename T, + typename Op0, + typename Op1, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + T transform_reduce( + execution::parallel_unsequenced_policy, + I f, + I l, + T x, + Op0 op0, + Op1 op1) + { + return ::thrust::transform_reduce( + ::thrust::device, + f, + l, + ::std::move(op1), + ::std::move(x), + ::std::move(op0)); + } + + template< + typename I, + typename T, + typename Op0, + typename Op1, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + T transform_reduce( + execution::parallel_unsequenced_policy, + I f, + I l, + T x, + Op0 op0, + Op1 op1) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::transform_reduce( + ::std::execution::par, + f, + l, + ::std::move(x), + ::std::move(op0), + ::std::move(op1)); + } + // END TRANSFORM_REDUCE + + // BEGIN TRANSFORM_EXCLUSIVE_SCAN + template< + typename I, + typename O, + typename T, + typename Op0, + typename Op1, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform_exclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + T x, + Op0 op0, + Op1 op1) + { + return ::thrust::transform_exclusive_scan( + ::thrust::device, + fi, + li, + fo, + ::std::move(op1), + ::std::move(x), + ::std::move(op0)); + } + + template< + typename I, + typename O, + typename T, + typename Op0, + typename Op1, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform_exclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + T x, + Op0 op0, + Op1 op1) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::transform_exclusive_scan( + ::std::execution::par, + fi, + li, + fo, + ::std::move(x), + ::std::move(op0), + ::std::move(op1)); + } + // END TRANSFORM_EXCLUSIVE_SCAN + + // BEGIN TRANSFORM_INCLUSIVE_SCAN + template< + typename I, + typename O, + typename Op0, + typename Op1, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform_inclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + Op0 op0, + Op1 op1) + { + return ::thrust::transform_inclusive_scan( + ::thrust::device, + fi, + li, + fo, + ::std::move(op1), + ::std::move(op0)); + } + + template< + typename I, + typename O, + typename Op0, + typename Op1, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform_inclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + Op0 op0, + Op1 op1) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::transform_inclusive_scan( + ::std::execution::par, + fi, + li, + fo, + ::std::move(op0), + ::std::move(op1)); + } + + template< + typename I, + typename O, + typename Op0, + typename Op1, + typename T, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform_inclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + Op0 op0, + Op1 op1, + T x) + { // TODO: this is inefficient. + if (fi == li) return fo; + + auto lo = ::thrust::transform_inclusive_scan( + ::thrust::device, + fi, + li, + fo, + ::std::move(op1), + op0); + + return ::thrust::transform( + ::thrust::device, + fo, + lo, + fo, + [op0 = ::std::move(op0), x = ::std::move(x)](auto&& y) { + return op0(x, y); + }); + } + + template< + typename I, + typename O, + typename Op0, + typename Op1, + typename T, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform_inclusive_scan( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + Op0 op0, + Op1 op1, + T x) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::transform_inclusive_scan( + ::std::execution::par, + fi, + li, + fo, + ::std::move(op0), + ::std::move(op1), + ::std::move(x)); + } + // END TRANSFORM_INCLUSIVE_SCAN +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/order_changing.hpp b/thrust/system/hip/hipstdpar/impl/order_changing.hpp new file mode 100644 index 000000000..2ad9b559e --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/order_changing.hpp @@ -0,0 +1,101 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/order_changing.hpp + * \brief Order-changing operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN REVERSE + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void reverse(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::reverse(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + void reverse(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::reverse(::std::execution::par, f, l); + } + // END REVERSE + + // BEGIN REVERSE_COPY + template< + typename I, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void reverse_copy( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::reverse_copy(::thrust::device, fi, li, fo); + } + + template< + typename I, + typename O, + enable_if_t()>* = nullptr> + inline + void reverse_copy( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::reverse_copy(::std::execution::par, fi, li, fo); + } + // END REVERSE_COPY + + // BEGIN ROTATE + // TODO: UNIMPLEMENTED IN THRUST + // END ROTATE + + // BEGIN ROTATE_COPY + // TODO: UNIMPLEMENTED IN THRUST + // END ROTATE_COPY + + // BEGIN SHIFT_LEFT + // TODO: UNIMPLEMENTED IN THRUST + // END SHIFT_LEFT + + // BEGIN SHIFT_RIGHT + // TODO: UNIMPLEMENTED IN THRUST + // END SHIFT_RIGHT +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/partitioning.hpp b/thrust/system/hip/hipstdpar/impl/partitioning.hpp new file mode 100644 index 000000000..c4e74989b --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/partitioning.hpp @@ -0,0 +1,201 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/partitioning.hpp + * \brief Partitioning operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN IS_PARTITIONED + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool is_partitioned( + execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::is_partitioned( + ::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool is_partitioned( + execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::is_partitioned( + ::std::execution::par, f, l, ::std::move(p)); + } + // END IS_PARTITIONED + + // BEGIN PARTITION + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I partition(execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::partition(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I partition(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return + ::std::partition(::std::execution::par, f, l, ::std::move(p)); + } + // END PARTITION + + // BEGIN PARTITION_COPY + template< + typename I, + typename O0, + typename O1, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + pair partition_copy( + execution::parallel_unsequenced_policy, + I f, + I l, + O0 fo0, + O1 fo1, + P p) + { + auto [r0, r1] = ::thrust::partition_copy( + ::thrust::device, f, l, fo0, fo1, ::std::move(p)); + + return {::std::move(r0), ::std::move(r1)}; + } + + template< + typename I, + typename O0, + typename O1, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + pair partition_copy( + execution::parallel_unsequenced_policy, + I f, + I l, + O0 fo0, + O1 fo1, + P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::partition_copy( + ::std::execution::par, f, l, fo0, fo1, ::std::move(p)); + } + // END PARTITION_COPY + + // BEGIN STABLE_PARTITION + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I stable_partition( + execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::stable_partition( + ::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I stable_partition( + execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::stable_partition( + ::std::execution::par, f, l, ::std::move(p)); + } + // END STABLE_PARTITION +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/removing.hpp b/thrust/system/hip/hipstdpar/impl/removing.hpp new file mode 100644 index 000000000..0427b3f90 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/removing.hpp @@ -0,0 +1,295 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/removing.hpp + * \brief Removing operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN REMOVE + template< + typename I, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I remove(execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + return ::thrust::remove(::thrust::device, f, l, x); + } + + template< + typename I, + typename T, + enable_if_t()>* = nullptr> + inline + I remove(execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::remove(::std::execution::par, f, l, x); + } + // END REMOVE + + // BEGIN REMOVE_IF + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I remove_if(execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::remove_if(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I remove_if(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return + ::std::remove_if(::std::execution::par, f, l, ::std::move(p)); + } + // END REMOVE_IF + + // BEGIN REMOVE_COPY + template< + typename I, + typename O, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O remove_copy( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + const T& x) + { + return ::thrust::remove_copy(::thrust::device, fi, li, fo, x); + } + + template< + typename I, + typename O, + typename T, + enable_if_t()>* = nullptr> + inline + O remove_copy( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::remove_copy(::std::execution::par, fi, li, fo, x); + } + // END REMOVE_COPY + + // BEGIN REMOVE_COPY_IF + template< + typename I, + typename O, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + O remove_copy_if( + execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) + { + return ::thrust::remove_copy_if( + ::thrust::device, fi, li, fo, ::std::move(p)); + } + + template< + typename I, + typename O, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + O remove_copy_if( + execution::parallel_unsequenced_policy, I fi, I li, O fo, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::remove_copy_if( + ::std::execution::par, fi, li, fo, ::std::move(p)); + } + // END REMOVE_COPY_IF + + // BEGIN UNIQUE + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I unique(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::unique(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + I unique(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::unique(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + I unique(execution::parallel_unsequenced_policy, I f, I l, R r) + { + return ::thrust::unique(::thrust::device, f, l, ::std::move(r)); + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + I unique(execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::unique(::std::execution::par, f, l, ::std::move(r)); + } + // END UNIQUE + + // BEGIN UNIQUE_COPY + template< + typename I, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O unique_copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::unique_copy(::thrust::device, fi, li, fo); + } + + template< + typename I, + typename O, + enable_if_t()>* = nullptr> + inline + O unique_copy(execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::unique_copy(::std::execution::par, fi, li, fo); + } + + template< + typename I, + typename O, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O unique_copy( + execution::parallel_unsequenced_policy, I fi, I li, O fo, R r) + { + return ::thrust::unique_copy( + ::thrust::device, fi, li, fo, ::std::move(r)); + } + + template< + typename I, + typename O, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O unique_copy( + execution::parallel_unsequenced_policy, I fi, I li, O fo, R r) + { + + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::unique_copy( + ::std::execution::par, fi, li, fo, ::std::move(r)); + } + // END UNIQUE_COPY +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/search.hpp b/thrust/system/hip/hipstdpar/impl/search.hpp new file mode 100644 index 000000000..10383ac4d --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/search.hpp @@ -0,0 +1,683 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/search.hpp + * \brief Search operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN ALL_OF + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool all_of(execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::all_of(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool all_of(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::all_of(::std::execution::par, f, l, ::std::move(p)); + } + // END ALL_OF + + // BEGIN ANY_OF + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool any_of(execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::any_of(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool any_of(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::any_of(::std::execution::par, f, l, ::std::move(p)); + } + // END ANY_OF + + // BEGIN NONE_OF + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool none_of(execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::none_of(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + bool none_of(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::none_of(::std::execution::par, f, l, ::std::move(p)); + } + // END NONE_OF + + // BEGIN FIND + template< + typename I, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I find(execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + return ::thrust::find(::thrust::device, f, l, x); + } + + template< + typename I, + typename T, + enable_if_t()>* = nullptr> + inline + I find(execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::find(::std::execution::par, f, l, x); + } + // END FIND + + // BEGIN FIND_IF + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I find_if(execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::find_if(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I find_if(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::find_if(::std::execution::par, f, l, ::std::move(p)); + } + // END FIND_IF + + // BEGIN FIND_IF_NOT + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I find_if_not(execution::parallel_unsequenced_policy, I f, I l, P p) + { + return + ::thrust::find_if_not(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I find_if_not(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return + ::std::find_if_not(::std::execution::par, f, l, ::std::move(p)); + } + // END FIND_IF_NOT + + // BEGIN FIND_END + // TODO: UNIMPLEMENTED IN THRUST + // END FIND_END + + // BEGIN FIND_FIRST_OF + // TODO: UNIMPLEMENTED IN THRUST + // END FIND_FIRST_OF + + // BEGIN ADJACENT_FIND + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I adjacent_find(execution::parallel_unsequenced_policy, I f, I l) + { + if (f == l) return l; + + const auto r = ::thrust::mismatch( + ::thrust::device, f + 1, l, f, not_equal_to<>{}); + + return (r.first == l) ? l : r.second; + } + + template< + typename I, + typename P, + enable_if_t()>* = nullptr> + inline + I adjacent_find(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::adjacent_find(::std::execution::par, f, l); + } + + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I adjacent_find(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if (f == l) return l; + + const auto r = ::thrust::mismatch( + ::thrust::device, f + 1, l, f, not_fn(::std::move(p))); + + return (r.first == l) ? l : r.second; + } + + template< + typename I, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + I adjacent_find(execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::adjacent_find( + ::std::execution::par, f, l, ::std::move(p)); + } + // END ADJACENT_FIND + + // BEGIN COUNT + template< + typename I, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + typename iterator_traits::difference_type count( + execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + return ::thrust::count(::thrust::device, f, l, x); + } + + template< + typename I, + typename T, + enable_if_t()>* = nullptr> + inline + typename iterator_traits::difference_type count( + execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::count(::std::execution::par, f, l, x); + } + // END COUNT + + // BEGIN COUNT_IF + template< + typename I, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + typename iterator_traits::difference_type count_if( + execution::parallel_unsequenced_policy, I f, I l, P p) + { + return ::thrust::count_if(::thrust::device, f, l, ::std::move(p)); + } + + template< + typename I, + typename O, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + typename iterator_traits::difference_type count_if( + execution::parallel_unsequenced_policy, I f, I l, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::count_if(::std::execution::par, f, l, ::std::move(p)); + } + // END COUNT_IF + + // BEGIN MISMATCH + template< + typename I0, + typename I1, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) + { + auto [m0, m1] = ::thrust::mismatch(::thrust::device, f0, l0, f1); + + return {::std::move(m0), ::std::move(m1)}; + } + + template< + typename I0, + typename I1, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::mismatch(::std::execution::par, f0, l0, f1); + } + + template< + typename I0, + typename I1, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, P p) + { + auto [m0, m1] = ::thrust::mismatch( + ::thrust::device, f0, l0, f1, ::std::move(p)); + + return {::std::move(m0), ::std::move(m1)}; + } + + template< + typename I0, + typename I1, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::mismatch( + ::std::execution::par, f0, l0, f1, ::std::move(p)); + } + + template< + typename I0, + typename I1, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + const auto n = ::std::min(l0 - f0, l1 - f1); + + auto [m0, m1] = + ::thrust::mismatch(::thrust::device, f0, f0 + n, f1); + + return {::std::move(m0), ::std::move(m1)}; + } + + template< + typename I0, + typename I1, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::mismatch(::std::execution::par, f0, l0, f1, l1); + } + + template< + typename I0, + typename I1, + typename P, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + P p) + { + const auto n = ::std::min(l0 - f0, l1 - f1); + + auto [m0, m1] = ::thrust::mismatch( + ::thrust::device, f0, f0 + n, f1, ::std::move(p)); + + return {::std::move(m0), ::std::move(m1)}; + } + + template< + typename I0, + typename I1, + typename P, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + pair mismatch( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + P p) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::mismatch( + ::std::execution::par, f0, l0, f1, l1, ::std::move(p)); + } + // END MISMATCH + + // BEGIN EQUAL + template< + typename I0, + typename I1, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool equal(execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) + { + return ::thrust::equal(::thrust::device, f0, l0, f1); + } + + template< + typename I0, + typename I1, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool equal(execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::equal(::std::execution::par, f0, l0, f1); + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool equal( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, R r) + { + return + ::thrust::equal(::thrust::device, f0, l0, f1, ::std::move(r)); + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool equal( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + return + ::std::equal(::std::execution::par, f0, l0, f1, ::std::move(r)); + } + + template< + typename I0, + typename I1, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool equal( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + if (l0 - f0 != l1 - f1) return false; + + return ::thrust::equal(::thrust::device, f0, l0, f1); + } + + template< + typename I0, + typename I1, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool equal( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::equal(::std::execution::par, f0, l0, f1, l1); + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool equal( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + R r) + { + if (l0 - f0 != l1 - f1) return false; + + return ::thrust::equal( + ::thrust::device, f0, l0, f1, ::std::move(r)); + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool equal( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + return ::std::equal( + ::std::execution::par, f0, l0, f1, l1, ::std::move(r)); + } + // END EQUAL + + // BEGIN SEARCH + // TODO: UNIMPLEMENTED IN THRUST + // END SEARCH + + // BEGIN SEARCH_N + // TODO: UNIMPLEMENTED IN THRUST + // END SEARCH_N +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/set.hpp b/thrust/system/hip/hipstdpar/impl/set.hpp new file mode 100644 index 000000000..f28980361 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/set.hpp @@ -0,0 +1,514 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/set.hpp + * \brief Set operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN INCLUDES + template< + typename I0, + typename I1, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool includes( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + ::thrust::discard_iterator<> cnt{0}; + + return ::thrust::set_difference( + ::thrust::device, f1, l1, f0, l0, cnt) == cnt; + } + + template< + typename I0, + typename I1, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool includes( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1, I1 l1) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::includes(::std::execution::par, f0, l0, f1, l1); + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool includes( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + R r) + { + ::thrust::discard_iterator<> cnt{0}; + + return ::thrust::set_difference( + ::thrust::device, f1, l1, f0, l0, cnt, ::std::move(r)) == cnt; + } + + template< + typename I0, + typename I1, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool includes( + execution::parallel_unsequenced_policy, + I0 f0, + I0 l0, + I1 f1, + I1 l1, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::includes( + ::std::execution::par, f1, l1, f0, l0, ::std::move(r)); + } + // END INCLUDES + + // BEGIN SET_UNION + template< + typename I0, + typename I1, + typename O, + enable_if_t< + ::hipstd::is_offloadable_iterator>* = nullptr> + inline + O set_union( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + return + ::thrust::set_union(::thrust::device, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + enable_if_t< + !::hipstd::is_offloadable_iterator>* = nullptr> + inline + O set_union( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return + ::std::set_union(::std::execution::par, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_union( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + return ::thrust::set_union( + ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_union( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::set_union( + ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + // END SET_UNION + + // BEGIN SET_INTERSECTION + template< + typename I0, + typename I1, + typename O, + enable_if_t< + ::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O set_intersection( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + return ::thrust::set_intersection( + ::thrust::device, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O set_intersection( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::set_intersection( + ::std::execution::par, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_intersection( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + return ::thrust::set_intersection( + ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_intersection( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::set_intersection( + ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + // END SET_INTERSECTION + + // BEGIN SET_DIFFERENCE + template< + typename I0, + typename I1, + typename O, + enable_if_t< + ::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O set_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + return ::thrust::set_difference( + ::thrust::device, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O set_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::set_difference( + ::std::execution::par, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + return ::thrust::set_difference( + ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::set_difference( + ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + // END SET_DIFFERENCE + + // BEGIN SET_SYMMETRIC_DIFFERENCE + template< + typename I0, + typename I1, + typename O, + enable_if_t< + ::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O set_symmetric_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + return ::thrust::set_symmetric_difference( + ::thrust::device, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O set_symmetric_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::set_symmetric_difference( + ::std::execution::par, fi0, li0, fi1, li1, fo); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_symmetric_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + return ::thrust::set_symmetric_difference( + ::thrust::device, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + + template< + typename I0, + typename I1, + typename O, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O set_symmetric_difference( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + I1 li1, + O fo, + R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::set_symmetric_difference( + ::std::execution::par, fi0, li0, fi1, li1, fo, ::std::move(r)); + } + // END SET_SYMMETRIC_DIFFERENCE +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/sorting.hpp b/thrust/system/hip/hipstdpar/impl/sorting.hpp new file mode 100644 index 000000000..4cc5b3728 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/sorting.hpp @@ -0,0 +1,559 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/sorting.hpp + * \brief Sorting operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include + +#include +#include +#include + +namespace std +{ + // BEGIN SORT + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void sort(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::sort(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + void sort(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::sort(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + void sort(execution::parallel_unsequenced_policy, I f, I l, R r) + { + return ::thrust::sort(::thrust::device, f, l, ::std::move(r)); + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + void sort(execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::sort(::std::execution::par, f, l, ::std::move(r)); + } + // END SORT + + // BEGIN STABLE_SORT + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void stable_sort(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::stable_sort(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + void stable_sort(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::stable_sort(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + void stable_sort(execution::parallel_unsequenced_policy, I f, I l, R r) + { + return + ::thrust::stable_sort(::thrust::device, f, l, ::std::move(r)); + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + void stable_sort(execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::stable_sort(::std::execution::par, f, l, ::std::move(r)); + } + // END STABLE_SORT + + // BEGIN PARTIAL_SORT + template () + || !hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt middle, + KeysIt last, + CompareOp compare_op) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr(!hipstd::is_offloadable_callable()) + { + hipstd::unsupported_callable_type(); + } + + std::partial_sort(std::execution::par, first, middle, last, std::move(compare_op)); + } + + template () + && hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt middle, + KeysIt last, + CompareOp compare_op) + { + const size_t count = static_cast(thrust::distance(first, last)); + const size_t n = static_cast(thrust::distance(first, middle)); + + if(count == 0 || n == 0) + { + return; + } + + const size_t n_index = n - 1; + + auto policy = thrust::device; + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::partial_sort( + nullptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp( + policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::partial_sort( + ptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), + "partial_sort: failed to synchronize"); + } + + template ()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt middle, + KeysIt last) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + + std::partial_sort(std::execution::par, first, middle, last); + } + + template ()>* = nullptr> + inline void partial_sort(execution::parallel_unsequenced_policy policy, + KeysIt first, + KeysIt middle, + KeysIt last) + { + typedef typename thrust::iterator_value::type item_type; + std::partial_sort(policy, first, middle, last, thrust::less()); + } + // END PARTIAL_SORT + + // BEGIN PARTIAL_SORT_COPY + template () + || !hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last, + CompareOp compare_op) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr(!hipstd::is_offloadable_callable()) + { + hipstd::unsupported_callable_type(); + } + + std::partial_sort_copy(std::execution::par, first, last, d_first, d_last, std::move(compare_op)); + } + + template () + && hipstd::is_offloadable_callable()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last, + CompareOp compare_op) + { + const size_t count = static_cast(thrust::distance(first, last)); + const size_t d_count = static_cast(thrust::distance(d_first, d_last)); + + if(count == 0 || d_count == 0) + { + return; + } + + const size_t d_index = d_count - 1; + + auto policy = thrust::device; + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::partial_sort_copy(nullptr, + storage_size, + first, + d_first, + d_index, + count, + compare_op, + stream, + debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp( + policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::partial_sort_copy( + ptr, storage_size, first, d_first, d_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), + "partial_sort_copy: failed to synchronize"); + } + + template ()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + + std::partial_sort_copy(std::execution::par, first, last, d_first, d_last); + } + + template ()>* = nullptr> + inline void partial_sort_copy(execution::parallel_unsequenced_policy policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last) + { + typedef typename thrust::iterator_value::type item_type; + std::partial_sort_copy(policy, first, last, d_first, d_last, thrust::less()); + } + // END PARTIAL_SORT_COPY + + // BEGIN IS_SORTED + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + bool is_sorted(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::is_sorted(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + bool is_sorted(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::is_sorted(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool is_sorted(execution::parallel_unsequenced_policy, I f, I l, R r) + { + return ::thrust::is_sorted(::thrust::device, f, l, ::std::move(r)); + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + bool is_sorted(execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return + ::std::is_sorted(::std::execution::par, f, l, ::std::move(r)); + } + // END IS_SORTED + + // BEGIN IS_SORTED_UNTIL + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l) + { + return ::thrust::is_sorted_until(::thrust::device, f, l); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::is_sorted_until(::std::execution::par, f, l); + } + + template< + typename I, + typename R, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l, R r) + { + return ::thrust::is_sorted_until( + ::thrust::device, f, l, ::std::move(r)); + } + + template< + typename I, + typename R, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + I is_sorted_until(execution::parallel_unsequenced_policy, I f, I l, R r) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::is_sorted_until( + ::std::execution::par, f, l, ::std::move(r)); + } + // END IS_SORTED_UNTIL + + // BEGIN NTH_ELEMENT + template () + || !hipstd::is_offloadable_callable()>* = nullptr> + inline void nth_element(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt nth, + KeysIt last, + CompareOp compare_op) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr(!hipstd::is_offloadable_callable()) + { + hipstd::unsupported_callable_type(); + } + + std::nth_element(std::execution::par, first, nth, last, std::move(compare_op)); + } + + template () + && hipstd::is_offloadable_callable()>* = nullptr> + inline void nth_element(execution::parallel_unsequenced_policy, + KeysIt first, + KeysIt nth, + KeysIt last, + CompareOp compare_op) + { + const size_t count = static_cast(thrust::distance(first, last)); + const size_t n = static_cast(thrust::distance(first, nth)); + + if(count == 0) + { + return; + } + + auto policy = thrust::device; + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::nth_element( + nullptr, storage_size, first, n, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 1st step"); + // Allocate temporary storage. + thrust::detail::temporary_array tmp( + policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::nth_element( + ptr, storage_size, first, n, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), + "nth_element: failed to synchronize"); + } + + template ()>* = nullptr> + inline void + nth_element(execution::parallel_unsequenced_policy, KeysIt first, KeysIt nth, KeysIt last) + { + if constexpr(!hipstd::is_offloadable_iterator()) + { + hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + + std::nth_element(std::execution::par, first, nth, last); + } + + template ()>* = nullptr> + inline void nth_element(execution::parallel_unsequenced_policy policy, + KeysIt first, + KeysIt nth, + KeysIt last) + { + typedef typename thrust::iterator_value::type item_type; + std::nth_element(policy, first, nth, last, thrust::less()); + } + // END NTH_ELEMENT +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/swap.hpp b/thrust/system/hip/hipstdpar/impl/swap.hpp new file mode 100644 index 000000000..a9bd8d5dc --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/swap.hpp @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/swap.hpp + * \brief Swap operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN SWAP_RANGES + template< + typename I0, + typename I1, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I1 swap_ranges( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) + { + return ::thrust::swap_ranges(::thrust::device, f0, l0, f1); + } + + template< + typename I0, + typename I1, + enable_if_t< + !::hipstd::is_offloadable_iterator()>* = nullptr> + inline + I1 swap_ranges( + execution::parallel_unsequenced_policy, I0 f0, I0 l0, I1 f1) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::swap_ranges(::std::execution::par, f0, l0, f1); + } + // END SWAP_RANGES +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/transformation.hpp b/thrust/system/hip/hipstdpar/impl/transformation.hpp new file mode 100644 index 000000000..7525f41c2 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/transformation.hpp @@ -0,0 +1,296 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/transformation.hpp + * \brief Transformation operations implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN TRANSFORM + template< + typename I, + typename O, + typename F, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform( + execution::parallel_unsequenced_policy, I fi, I li, O fo, F fn) + { + return ::thrust::transform( + ::thrust::device, fi, li, fo, ::std::move(fn)); + } + + template< + typename I, + typename O, + typename F, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform( + execution::parallel_unsequenced_policy, I fi, I li, O fo, F fn) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::transform( + ::std::execution::par, fi, li, fo, ::std::move(fn)); + } + + template< + typename I0, + typename I1, + typename O, + typename F, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + O fo, + F fn) + { + return ::thrust::transform( + ::thrust::device, fi0, li0, fi1, fo, ::std::move(fn)); + } + + template< + typename I0, + typename I1, + typename O, + typename F, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable()>* = nullptr> + inline + O transform( + execution::parallel_unsequenced_policy, + I0 fi0, + I0 li0, + I1 fi1, + O fo, + F fn) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable()) { + ::hipstd::unsupported_callable_type(); + } + + return ::std::transform( + ::std::execution::par, fi0, li0, fi1, fo, ::std::move(fn)); + } + // END TRANSFORM + + // BEGIN REPLACE + template< + typename I, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void replace( + execution::parallel_unsequenced_policy, + I f, + I l, + const T& x, + const T& y) + { + return ::thrust::replace(::thrust::device, f, l, x, y); + } + + template< + typename I, + typename T, + enable_if_t()>* = nullptr> + inline + void replace( + execution::parallel_unsequenced_policy, + I f, + I l, + const T& x, + const T& y) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::replace(::std::execution::par, f, l, x, y); + } + // END REPLACE + + // BEGIN REPLACE_IF + template< + typename I, + typename P, + typename T, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + void replace_if( + execution::parallel_unsequenced_policy, I f, I l, P p, const T& x) + { + return + ::thrust::replace_if(::thrust::device, f, l, ::std::move(p), x); + } + + template< + typename I, + typename P, + typename T, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + void replace_if( + execution::parallel_unsequenced_policy, I f, I l, P p, const T& x) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::replace_if( + ::std::execution::par, f, l, ::std::move(p), x); + } + // END REPLACE_IF + + // BEGIN REPLACE_COPY + template< + typename I, + typename O, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void replace_copy( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + const T& x, + const T& y) + { + return ::thrust::replace_copy(::thrust::device, fi, li, fo, x, y); + } + + template< + typename I, + typename O, + typename T, + enable_if_t()>* = nullptr> + inline + void replace_copy( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + const T& x, + const T& y) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::replace_copy(::std::execution::par, fi, li, fo, x, y); + } + // END REPLACE_COPY + + // BEGIN REPLACE_COPY_IF + template< + typename I, + typename O, + typename P, + typename T, + enable_if_t< + ::hipstd::is_offloadable_iterator() && + ::hipstd::is_offloadable_callable

()>* = nullptr> + inline + void replace_copy_if( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + P p, + const T& x) + { + return ::thrust::replace_copy_if( + ::thrust::device, fi, li, fo, ::std::move(p), x); + } + + template< + typename I, + typename O, + typename P, + typename T, + enable_if_t< + !::hipstd::is_offloadable_iterator() || + !::hipstd::is_offloadable_callable

()>* = nullptr> + inline + void replace_copy_if( + execution::parallel_unsequenced_policy, + I fi, + I li, + O fo, + P p, + const T& x) + { + if constexpr (!::hipstd::is_offloadable_iterator()) { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + } + if constexpr (!::hipstd::is_offloadable_callable

()) { + ::hipstd::unsupported_callable_type

(); + } + + return ::std::replace_copy_if( + ::std::execution::par, fi, li, fo, ::std::move(p), x); + } + // END REPLACE_COPY_IF +} +#endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/uninitialized.hpp b/thrust/system/hip/hipstdpar/impl/uninitialized.hpp new file mode 100644 index 000000000..e0e5f1d82 --- /dev/null +++ b/thrust/system/hip/hipstdpar/impl/uninitialized.hpp @@ -0,0 +1,389 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/*! \file thrust/system/hip/hipstdpar/include/uninitialized.hpp + * \brief Operations on unitialized memory implementation detail header for HIPSTDPAR. + */ + +#pragma once + +#if defined(__HIPSTDPAR__) + +#include "hipstd.hpp" + +#include +#include +#include +#include + +#include +#include +#include + +namespace std +{ + // BEGIN UNINITIALIZED_COPY + template< + typename I, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O uninitialized_copy( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::uninitialized_copy(::thrust::device, fi, li, fo); + } + + template< + typename I, + typename O, + enable_if_t()>* = nullptr> + inline + O uninitialized_copy( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::uninitialized_copy(::std::execution::par, fi, li, fo); + } + // END UNINITIALIZED_COPY + + // BEGIN UNINITIALIZED_COPY_N + template< + typename I, + typename N, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O uninitialized_copy_n( + execution::parallel_unsequenced_policy, I fi, N n, O fo) + { + return ::thrust::uninitialized_copy_n(::thrust::device, fi, n, fo); + } + + template< + typename I, + typename N, + typename O, + enable_if_t()>* = nullptr> + inline + O uninitialized_copy_n( + execution::parallel_unsequenced_policy, I fi, N n, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return + ::std::uninitialized_copy_n(::std::execution::par, fi, n, fo); + } + // END UNINITIALIZED_COPY_N + + // BEGIN UNINITIALIZED_FILL + template< + typename I, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void uninitialized_fill( + execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + return ::thrust::uninitialized_fill(::thrust::device, f, l, x); + } + + template< + typename I, + typename T, + enable_if_t()>* = nullptr> + inline + void uninitialized_fill( + execution::parallel_unsequenced_policy, I f, I l, const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::offload_category>(); + + return ::std::uninitialized_fill(::std::execution::par, f, l, x); + } + // END UNINITIALIZED_FILL + + // BEGIN UNINITIALIZED_FILL_N + template< + typename I, + typename N, + typename T, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void uninitialized_fill( + execution::parallel_unsequenced_policy, I f, N n, const T& x) + { + return ::thrust::uninitialized_fill_n(::thrust::device, f, n, x); + } + + template< + typename I, + typename N, + typename T, + enable_if_t()>* = nullptr> + inline + void uninitialized_fill( + execution::parallel_unsequenced_policy, I f, N n, const T& x) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::uninitialized_fill_n(::std::execution::par, f, n, x); + } + // END UNINITIALIZED_FILL_N + + // BEGIN UNINITIALIZED_MOVE + template< + typename I, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O uninitialized_move( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + return ::thrust::uninitialized_copy( + ::thrust::device, + make_move_iterator(fi), + make_move_iterator(li), + fo); + } + + template< + typename I, + typename O, + enable_if_t()>* = nullptr> + inline + O uninitialized_move( + execution::parallel_unsequenced_policy, I fi, I li, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return ::std::uninitialized_move(::std::execution::par, fi, li, fo); + } + // END UNINITIALIZED_MOVE + + // BEGIN UNINITIALIZED_MOVE_N + template< + typename I, + typename N, + typename O, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + O uninitialized_move_n( + execution::parallel_unsequenced_policy, I fi, N n, O fo) + { + return ::thrust::uninitialized_copy_n( + ::thrust::device, make_move_iterator(fi), n, fo); + } + + template< + typename I, + typename N, + typename O, + enable_if_t()>* = nullptr> + inline + O uninitialized_move_n( + execution::parallel_unsequenced_policy, I fi, N n, O fo) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category, + typename iterator_traits::iterator_category>(); + + return + ::std::uninitialized_move_n(::std::execution::par, fi, n, fo); + } + // END UNINITIALIZED_MOVE_N + + // BEGIN UNINITIALIZED_DEFAULT_CONSTRUCT + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void uninitialized_default_construct( + execution::parallel_unsequenced_policy, I f, I l) + { + ::thrust::for_each(::thrust::device, f, l, [](auto& x) { + auto p = const_cast( + static_cast((addressof(x)))); + ::new (p) typename iterator_traits::value_type; + }); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + void uninitialized_default_construct( + execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::uninitialized_default_construct( + ::std::execution::par, f, l); + } + // END UNINITIALIZED_DEFAULT_CONSTRUCT + + // BEGIN UNINITIALIZED_DEFAULT_CONSTRUCT_N + template< + typename I, + typename N, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void uninitialized_default_construct_n( + execution::parallel_unsequenced_policy, I f, N n) + { + ::thrust::for_each_n(::thrust::device, f, n, [](auto& x) { + auto p = const_cast( + static_cast((addressof(x)))); + ::new (p) typename iterator_traits::value_type; + }); + } + + template< + typename I, + typename N, + enable_if_t()>* = nullptr> + inline + void uninitialized_default_construct_n( + execution::parallel_unsequenced_policy, I f, N n) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::uninitialized_default_construct_n( + ::std::execution::par, f, n); + } + // END UNINITIALIZED_DEFAULT_CONSTRUCT_N + + // BEGIN UNINITIALIZED_VALUE_CONSTRUCT + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void uninitialized_value_construct( + execution::parallel_unsequenced_policy, I f, I l) + { + ::thrust::for_each(::thrust::device, f, l, [](auto& x) { + auto p = const_cast( + static_cast((addressof(x)))); + ::new (p) typename iterator_traits::value_type{}; + }); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + void uninitialized_value_construct( + execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::uninitialized_value_construct( + ::std::execution::par, f, l); + } + // END UNINITIALIZED_VALUE_CONSTRUCT + + // BEGIN UNINITIALIZED_VALUE_CONSTRUCT_N + template< + typename I, + typename N, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void uninitialized_value_construct_n( + execution::parallel_unsequenced_policy, I f, N n) + { + ::thrust::for_each_n(::thrust::device, f, n, [](auto& x) { + auto p = const_cast( + static_cast((addressof(x)))); + ::new (p) typename iterator_traits::value_type{}; + }); + } + + template< + typename I, + typename N, + enable_if_t()>* = nullptr> + inline + void uninitialized_value_construct_n( + execution::parallel_unsequenced_policy, I f, N n) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::uninitialized_value_construct_n( + ::std::execution::par, f, n); + } + // END UNINITIALIZED_VALUE_CONSTRUCT_N + + // BEGIN DESTROY + template< + typename I, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void destroy(execution::parallel_unsequenced_policy, I f, I l) + { + ::thrust::for_each(f, l, [](auto& x) { destroy_at(addressof(x)); }); + } + + template< + typename I, + enable_if_t()>* = nullptr> + inline + void destroy(execution::parallel_unsequenced_policy, I f, I l) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::destroy(::std::execution::par, f, l); + } + // END DESTROY + + // BEGIN DESTROY_N + template< + typename I, + typename N, + enable_if_t<::hipstd::is_offloadable_iterator()>* = nullptr> + inline + void destroy_n(execution::parallel_unsequenced_policy, I f, N n) + { + ::thrust::for_each_n(f, n, [](auto& x) { + destroy_at(addressof(x)); + }); + } + + template< + typename I, + typename N, + enable_if_t()>* = nullptr> + inline + void destroy_n(execution::parallel_unsequenced_policy, I f, N n) + { + ::hipstd::unsupported_iterator_category< + typename iterator_traits::iterator_category>(); + + return ::std::destroy_n(::std::execution::par, f, n); + } + // END DESTROY_N +} +#endif // __HIPSTDPAR__ From e2d548f6fbc35196902ac43f9b30018e8c280f64 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Fri, 23 Aug 2024 08:14:15 +0000 Subject: [PATCH 38/44] Added relevant information to README and CHANGELOG regarding HIPSTDPAR --- CHANGELOG.md | 5 +++++ README.md | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 046ce2c53..c2cbb1b04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ Documentation for rocThrust available at * Merged changes from upstream CCCL/thrust 2.4.0 +### Changes + +* Split the contents of HIPSTDPAR's forwarding header into several implementation headers. + ## (Unreleased) rocThrust 3.2.0 for ROCm 6.3 ### Additions @@ -33,6 +37,7 @@ Documentation for rocThrust available at * Merged changes from upstream CCCL/thrust 2.2.0 * Updated the contents of `system/hip` and `test` with the upstream changes to `system/cuda` and `testing` +* Added HIPSTDPAR library as part of rocThrust. ### Changes diff --git a/README.md b/README.md index 09017d867..9f9eee6fb 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ works on HIP and ROCm software. Currently there is no CUDA backend in place. Software requirements include: -* CMake (3.5.1 or later) +* CMake (3.10.2 or later) * AMD [ROCm](https://rocm.docs.amd.com) Software (1.8.0 or later) * Including the [HipCC](https://github.com/ROCm/HIP) compiler, which must be set as your C++ compiler for ROCm @@ -266,6 +266,39 @@ make -j4 ./benchmarks/benchmark_thrust_bench ``` +## HIPSTDPAR +rocThrust also hosts the header files for [HIPSTDPAR](https://rocm.blogs.amd.com/software-tools-optimization/hipstdpar/README.html#c-17-parallel-algorithms-and-hipstdpar). +Within these headers, a great part of the C++ Standard Library parallel algorithms are overloaded so that rocThrust's and rocPRIM's implementations of those algorithms are used when they are invoked with the `parallel_unsequenced_policy` policy. +When compiling with the proper flags (see [LLVM (AMD's fork) docs](https://github.com/ROCm/llvm-project/blob/rocm-6.2.x/clang/docs/HIPSupport.rst#implementation-driver) for the complete list), the HIPSTDPAR headers are implicitly included by the compiler, and therefore the execution of these parallel algorithms will be offloaded to AMD devices. + +### Install +HIPSTDPAR is currently packaged along rocThrust. The `hipstdpar` package is set up as a virtual package provided by `rocthrust`, so the latter needs to be installed entirely for getting HIPSTDPAR's headers. Conversely, installing the `rocthrust` package will also include HIPSTDPAR's headers in the system. + +### Tests +rocThrust also includes some tests for checking the correct building of HIPSTDPAR implementations. These are located under the [tests/hipstdpar](/test/hipstdpar/) folder. When configuring the project with the `BUILD_TEST` option on, these tests will also be enabled. Additionally, one can configure **only** HIPSTDPAR's tests by disabling `BUILD_TEST` and enabling `BUILD_HIPSTDPAR_TEST`. In general, the following steps can be followed for building and running the tests: + +```sh +git clone https://github.com/ROCm/rocThrust + +# Go to rocThrust directory, create and go to the build directory. +cd rocThrust; mkdir build; cd build + +# Configure rocThrust. +[CXX=hipcc] cmake ../. -D BUILD_TEST=ON # Configure rocThrust's and HIPSTDPAR's tests. +[CXX=hipcc] cmake ../. -D BUILD_TEST=OFF -D BUILD_HIPSTDPAR_TEST=ON # Only configure HIPSTDPAR's tests. + +# Build +make -j4 + +# Run tests. +ctest --output-on-failure +``` + +#### Requirements +* [rocPRIM](https://github.com/ROCm/rocPRIM) and [rocThrust](https://github.com/ROCm/rocThrust) libraries +* [TBB](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onetbb.html) library +* CMake (3.10.2 or later) + ## Support You can report bugs and feature requests through the GitHub From 28da1b168817087aa2ad36d08063a611b163c821 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Fri, 13 Sep 2024 14:31:33 +0000 Subject: [PATCH 39/44] Clarified upstream LLVM offload support --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9f9eee6fb..adad3247c 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,9 @@ make -j4 ## HIPSTDPAR rocThrust also hosts the header files for [HIPSTDPAR](https://rocm.blogs.amd.com/software-tools-optimization/hipstdpar/README.html#c-17-parallel-algorithms-and-hipstdpar). Within these headers, a great part of the C++ Standard Library parallel algorithms are overloaded so that rocThrust's and rocPRIM's implementations of those algorithms are used when they are invoked with the `parallel_unsequenced_policy` policy. -When compiling with the proper flags (see [LLVM (AMD's fork) docs](https://github.com/ROCm/llvm-project/blob/rocm-6.2.x/clang/docs/HIPSupport.rst#implementation-driver) for the complete list), the HIPSTDPAR headers are implicitly included by the compiler, and therefore the execution of these parallel algorithms will be offloaded to AMD devices. +When compiling with the proper flags (see [LLVM (AMD's fork) docs](https://github.com/ROCm/llvm-project/blob/rocm-6.2.x/clang/docs/HIPSupport.rst#implementation-driver)[^1] for the complete list), the HIPSTDPAR headers are implicitly included by the compiler, and therefore the execution of these parallel algorithms will be offloaded to AMD devices. + +[^1]: Altough currently only AMD's fork of LLVM contains the docs for the [C++ Standard Parallelism Offload Support](https://github.com/ROCm/llvm-project/blob/rocm-6.2.x/clang/docs/HIPSupport.rst#c-standard-parallelism-offload-support-compiler-and-runtime), both of them (the upstream LLVM and AMD's fork) do support it. ### Install HIPSTDPAR is currently packaged along rocThrust. The `hipstdpar` package is set up as a virtual package provided by `rocthrust`, so the latter needs to be installed entirely for getting HIPSTDPAR's headers. Conversely, installing the `rocthrust` package will also include HIPSTDPAR's headers in the system. From ed35b2862459ed3bf017b8e07edafb66c0a0ed59 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Fri, 20 Sep 2024 10:25:05 +0000 Subject: [PATCH 40/44] Emit error when HIPSTDPAR macros are not defined --- thrust/system/hip/hipstdpar/hipstdpar_lib.hpp | 3 ++- thrust/system/hip/hipstdpar/impl/batch.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/copy.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/generation.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/heap.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/hipstd.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp | 7 ++++++- .../hip/hipstdpar/impl/lexicographical_comparison.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/merge.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/min_max.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/numeric.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/order_changing.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/partitioning.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/removing.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/search.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/set.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/sorting.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/swap.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/transformation.hpp | 2 ++ thrust/system/hip/hipstdpar/impl/uninitialized.hpp | 2 ++ 20 files changed, 44 insertions(+), 2 deletions(-) diff --git a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp index 20d0e9496..46e9a3d12 100644 --- a/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp +++ b/thrust/system/hip/hipstdpar/hipstdpar_lib.hpp @@ -21,8 +21,9 @@ #if defined(__HIPSTDPAR__) // Interposed allocations +#if defined(__HIPSTDPAR_INTERPOSE_ALLOC__) #include "impl/interpose_allocations.hpp" - +#endif // Parallel STL algorithms #include "impl/batch.hpp" #include "impl/copy.hpp" diff --git a/thrust/system/hip/hipstdpar/impl/batch.hpp b/thrust/system/hip/hipstdpar/impl/batch.hpp index 103176bdd..1a598c76a 100644 --- a/thrust/system/hip/hipstdpar/impl/batch.hpp +++ b/thrust/system/hip/hipstdpar/impl/batch.hpp @@ -104,4 +104,6 @@ namespace std } // END FOR_EACH_N } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/copy.hpp b/thrust/system/hip/hipstdpar/impl/copy.hpp index d69df37ee..b3539b6e1 100644 --- a/thrust/system/hip/hipstdpar/impl/copy.hpp +++ b/thrust/system/hip/hipstdpar/impl/copy.hpp @@ -154,4 +154,6 @@ namespace std } // END MOVE } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/generation.hpp b/thrust/system/hip/hipstdpar/impl/generation.hpp index f87a01a88..0f4995870 100644 --- a/thrust/system/hip/hipstdpar/impl/generation.hpp +++ b/thrust/system/hip/hipstdpar/impl/generation.hpp @@ -158,4 +158,6 @@ namespace std } // END GENERATE_N } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/heap.hpp b/thrust/system/hip/hipstdpar/impl/heap.hpp index 4e3dc81af..d00f25a69 100644 --- a/thrust/system/hip/hipstdpar/impl/heap.hpp +++ b/thrust/system/hip/hipstdpar/impl/heap.hpp @@ -36,4 +36,6 @@ namespace std // TODO: UNIMPLEMENTED IN THRUST // END IS_HEAP_UNTIL } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/hipstd.hpp b/thrust/system/hip/hipstdpar/impl/hipstd.hpp index 079704a1f..661a05593 100644 --- a/thrust/system/hip/hipstdpar/impl/hipstd.hpp +++ b/thrust/system/hip/hipstdpar/impl/hipstd.hpp @@ -83,6 +83,8 @@ inline constexpr { } } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ #endif // THRUST_SYSTEM_HIP_HIPSTDPAR_HIPSTD_HPP diff --git a/thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp b/thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp index 46afd0ca8..193663c75 100644 --- a/thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp +++ b/thrust/system/hip/hipstdpar/impl/interpose_allocations.hpp @@ -212,6 +212,11 @@ __hipstdpar_operator_delete_sized(void* p, std::size_t n) noexcept { return __hipstdpar_operator_delete_aligned_sized(p, n, alignof(std::max_align_t)); } -#endif // __HIPSTDPAR_INTERPOSE_ALLOC__ +# else // __HIPSTDPAR_INTERPOSE_ALLOC__ +# error "__HIPSTDPAR_INTERPOSE_ALLOC__ should be defined. Please use the '--hipstdpar-interpose-alloc' compile option." +# endif // __HIPSTDPAR_INTERPOSE_ALLOC__ + +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp b/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp index f320c2d63..d44840962 100644 --- a/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp +++ b/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp @@ -135,4 +135,6 @@ namespace std } // END LEXICOGRAPHICAL_COMPARE } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/merge.hpp b/thrust/system/hip/hipstdpar/impl/merge.hpp index bdae50ca8..38d17fafe 100644 --- a/thrust/system/hip/hipstdpar/impl/merge.hpp +++ b/thrust/system/hip/hipstdpar/impl/merge.hpp @@ -132,4 +132,6 @@ namespace std // TODO: UNIMPLEMENTED IN THRUST // END INPLACE_MERGE } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/min_max.hpp b/thrust/system/hip/hipstdpar/impl/min_max.hpp index 2fcdd6744..341175035 100644 --- a/thrust/system/hip/hipstdpar/impl/min_max.hpp +++ b/thrust/system/hip/hipstdpar/impl/min_max.hpp @@ -210,4 +210,6 @@ namespace std } // END MINMAX_ELEMENT } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/numeric.hpp b/thrust/system/hip/hipstdpar/impl/numeric.hpp index c887a25a6..61541fe55 100644 --- a/thrust/system/hip/hipstdpar/impl/numeric.hpp +++ b/thrust/system/hip/hipstdpar/impl/numeric.hpp @@ -792,4 +792,6 @@ namespace std } // END TRANSFORM_INCLUSIVE_SCAN } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/order_changing.hpp b/thrust/system/hip/hipstdpar/impl/order_changing.hpp index 2ad9b559e..3fc020a5f 100644 --- a/thrust/system/hip/hipstdpar/impl/order_changing.hpp +++ b/thrust/system/hip/hipstdpar/impl/order_changing.hpp @@ -98,4 +98,6 @@ namespace std // TODO: UNIMPLEMENTED IN THRUST // END SHIFT_RIGHT } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/partitioning.hpp b/thrust/system/hip/hipstdpar/impl/partitioning.hpp index c4e74989b..dbd8b490f 100644 --- a/thrust/system/hip/hipstdpar/impl/partitioning.hpp +++ b/thrust/system/hip/hipstdpar/impl/partitioning.hpp @@ -198,4 +198,6 @@ namespace std } // END STABLE_PARTITION } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/removing.hpp b/thrust/system/hip/hipstdpar/impl/removing.hpp index 0427b3f90..6fd210494 100644 --- a/thrust/system/hip/hipstdpar/impl/removing.hpp +++ b/thrust/system/hip/hipstdpar/impl/removing.hpp @@ -292,4 +292,6 @@ namespace std } // END UNIQUE_COPY } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/search.hpp b/thrust/system/hip/hipstdpar/impl/search.hpp index 10383ac4d..b2b133394 100644 --- a/thrust/system/hip/hipstdpar/impl/search.hpp +++ b/thrust/system/hip/hipstdpar/impl/search.hpp @@ -680,4 +680,6 @@ namespace std // TODO: UNIMPLEMENTED IN THRUST // END SEARCH_N } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/set.hpp b/thrust/system/hip/hipstdpar/impl/set.hpp index f28980361..3dda4bd21 100644 --- a/thrust/system/hip/hipstdpar/impl/set.hpp +++ b/thrust/system/hip/hipstdpar/impl/set.hpp @@ -511,4 +511,6 @@ namespace std } // END SET_SYMMETRIC_DIFFERENCE } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/sorting.hpp b/thrust/system/hip/hipstdpar/impl/sorting.hpp index 4cc5b3728..d992198a2 100644 --- a/thrust/system/hip/hipstdpar/impl/sorting.hpp +++ b/thrust/system/hip/hipstdpar/impl/sorting.hpp @@ -556,4 +556,6 @@ namespace std } // END NTH_ELEMENT } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/swap.hpp b/thrust/system/hip/hipstdpar/impl/swap.hpp index a9bd8d5dc..38b7aec06 100644 --- a/thrust/system/hip/hipstdpar/impl/swap.hpp +++ b/thrust/system/hip/hipstdpar/impl/swap.hpp @@ -60,4 +60,6 @@ namespace std } // END SWAP_RANGES } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/transformation.hpp b/thrust/system/hip/hipstdpar/impl/transformation.hpp index 7525f41c2..34b639775 100644 --- a/thrust/system/hip/hipstdpar/impl/transformation.hpp +++ b/thrust/system/hip/hipstdpar/impl/transformation.hpp @@ -293,4 +293,6 @@ namespace std } // END REPLACE_COPY_IF } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ diff --git a/thrust/system/hip/hipstdpar/impl/uninitialized.hpp b/thrust/system/hip/hipstdpar/impl/uninitialized.hpp index e0e5f1d82..aee25d3fc 100644 --- a/thrust/system/hip/hipstdpar/impl/uninitialized.hpp +++ b/thrust/system/hip/hipstdpar/impl/uninitialized.hpp @@ -386,4 +386,6 @@ namespace std } // END DESTROY_N } +#else // __HIPSTDPAR__ +# error "__HIPSTDPAR__ should be defined. Please use the '--hipstdpar' compile option." #endif // __HIPSTDPAR__ From 5023555cec5c3daf9b9bed2f840987124b5e1f11 Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Fri, 20 Sep 2024 11:50:07 +0000 Subject: [PATCH 41/44] Move forwarding calls to rocPRIM to thrust's stubs --- thrust/system/hip/hipstdpar/impl/sorting.hpp | 228 +++++++++++-------- 1 file changed, 131 insertions(+), 97 deletions(-) diff --git a/thrust/system/hip/hipstdpar/impl/sorting.hpp b/thrust/system/hip/hipstdpar/impl/sorting.hpp index d992198a2..6a71c1af2 100644 --- a/thrust/system/hip/hipstdpar/impl/sorting.hpp +++ b/thrust/system/hip/hipstdpar/impl/sorting.hpp @@ -31,6 +31,134 @@ #include #include +// rocThrust is currently missing some API entries, forward calls to rocPRIM until they are added. +namespace thrust +{ +// BEGIN PARTIAL_SORT +template () + && hipstd::is_offloadable_callable()>* = nullptr> +inline void __partial_sort(thrust::hip_rocprim::par_t policy, + KeysIt first, + KeysIt middle, + KeysIt last, + CompareOp compare_op) +{ + const size_t count = static_cast(thrust::distance(first, last)); + const size_t n = static_cast(thrust::distance(first, middle)); + + if (count == 0 || n == 0) + { + return; + } + + const size_t n_index = n - 1; + + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::partial_sort(nullptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp(policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::partial_sort(ptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error( + thrust::hip_rocprim::synchronize_optional(policy), "partial_sort: failed to synchronize"); +} +// END PARTIAL_SORT + +// BEGIN PARTIAL_SORT_COPY +template () + && hipstd::is_offloadable_callable()>* = nullptr> +inline void __partial_sort_copy(thrust::hip_rocprim::par_t policy, + ForwardIt first, + ForwardIt last, + RandomIt d_first, + RandomIt d_last, + CompareOp compare_op) +{ + const size_t count = static_cast(thrust::distance(first, last)); + const size_t d_count = static_cast(thrust::distance(d_first, d_last)); + + if (count == 0 || d_count == 0) + { + return; + } + + const size_t d_index = d_count - 1; + + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = + rocprim::partial_sort_copy(nullptr, storage_size, first, d_first, d_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 1st step"); + + // Allocate temporary storage. + thrust::detail::temporary_array tmp(policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = + rocprim::partial_sort_copy(ptr, storage_size, first, d_first, d_index, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error( + thrust::hip_rocprim::synchronize_optional(policy), "partial_sort_copy: failed to synchronize"); +} +// END PARTIAL_SORT_COPY + +// BEGIN NTH_ELEMENT +template () + && hipstd::is_offloadable_callable()>* = nullptr> +inline void __nth_element(thrust::hip_rocprim::par_t policy, + KeysIt first, + KeysIt nth, + KeysIt last, + CompareOp compare_op) +{ + const size_t count = static_cast(thrust::distance(first, last)); + const size_t n = static_cast(thrust::distance(first, nth)); + + if (count == 0) + { + return; + } + + size_t storage_size = 0; + hipStream_t stream = thrust::hip_rocprim::stream(policy); + bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; + + hipError_t status; + + status = rocprim::nth_element(nullptr, storage_size, first, n, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 1st step"); + // Allocate temporary storage. + thrust::detail::temporary_array tmp(policy, storage_size); + void* ptr = static_cast(tmp.data().get()); + + status = rocprim::nth_element(ptr, storage_size, first, n, count, compare_op, stream, debug_sync); + thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 2nd step"); + thrust::hip_rocprim::throw_on_error( + thrust::hip_rocprim::synchronize_optional(policy), "nth_element: failed to synchronize"); +} +// END NTH_ELEMENT +} + namespace std { // BEGIN SORT @@ -179,37 +307,7 @@ namespace std KeysIt last, CompareOp compare_op) { - const size_t count = static_cast(thrust::distance(first, last)); - const size_t n = static_cast(thrust::distance(first, middle)); - - if(count == 0 || n == 0) - { - return; - } - - const size_t n_index = n - 1; - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::partial_sort( - nullptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 1st step"); - - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::partial_sort( - ptr, storage_size, first, n_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "partial_sort: failed to synchronize"); + ::thrust::__partial_sort(::thrust::device, first, middle, last, compare_op); } template (thrust::distance(first, last)); - const size_t d_count = static_cast(thrust::distance(d_first, d_last)); - - if(count == 0 || d_count == 0) - { - return; - } - - const size_t d_index = d_count - 1; - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::partial_sort_copy(nullptr, - storage_size, - first, - d_first, - d_index, - count, - compare_op, - stream, - debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 1st step"); - - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::partial_sort_copy( - ptr, storage_size, first, d_first, d_index, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "partial_sort_copy: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "partial_sort_copy: failed to synchronize"); + ::thrust::__partial_sort_copy(::thrust::device, first, last, d_first, d_last, compare_op); } template (thrust::distance(first, last)); - const size_t n = static_cast(thrust::distance(first, nth)); - - if(count == 0) - { - return; - } - - auto policy = thrust::device; - size_t storage_size = 0; - hipStream_t stream = thrust::hip_rocprim::stream(policy); - bool debug_sync = THRUST_HIP_DEBUG_SYNC_FLAG; - - hipError_t status; - - status = rocprim::nth_element( - nullptr, storage_size, first, n, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 1st step"); - // Allocate temporary storage. - thrust::detail::temporary_array tmp( - policy, storage_size); - void* ptr = static_cast(tmp.data().get()); - - status = rocprim::nth_element( - ptr, storage_size, first, n, count, compare_op, stream, debug_sync); - thrust::hip_rocprim::throw_on_error(status, "nth_element: failed on 2nd step"); - thrust::hip_rocprim::throw_on_error(thrust::hip_rocprim::synchronize_optional(policy), - "nth_element: failed to synchronize"); + ::thrust::__nth_element(::thrust::device, first, nth, last, compare_op); } template Date: Mon, 4 Nov 2024 14:14:22 +0000 Subject: [PATCH 42/44] Fix path to hipstdpar impl headers --- thrust/system/hip/hipstdpar/impl/batch.hpp | 2 +- thrust/system/hip/hipstdpar/impl/generation.hpp | 2 +- thrust/system/hip/hipstdpar/impl/heap.hpp | 2 +- thrust/system/hip/hipstdpar/impl/hipstd.hpp | 2 +- thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp | 2 +- thrust/system/hip/hipstdpar/impl/merge.hpp | 2 +- thrust/system/hip/hipstdpar/impl/min_max.hpp | 2 +- thrust/system/hip/hipstdpar/impl/numeric.hpp | 2 +- thrust/system/hip/hipstdpar/impl/order_changing.hpp | 2 +- thrust/system/hip/hipstdpar/impl/partitioning.hpp | 2 +- thrust/system/hip/hipstdpar/impl/removing.hpp | 2 +- thrust/system/hip/hipstdpar/impl/search.hpp | 2 +- thrust/system/hip/hipstdpar/impl/set.hpp | 2 +- thrust/system/hip/hipstdpar/impl/sorting.hpp | 2 +- thrust/system/hip/hipstdpar/impl/swap.hpp | 2 +- thrust/system/hip/hipstdpar/impl/transformation.hpp | 2 +- thrust/system/hip/hipstdpar/impl/uninitialized.hpp | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/thrust/system/hip/hipstdpar/impl/batch.hpp b/thrust/system/hip/hipstdpar/impl/batch.hpp index 1a598c76a..86f0b68d9 100644 --- a/thrust/system/hip/hipstdpar/impl/batch.hpp +++ b/thrust/system/hip/hipstdpar/impl/batch.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/batch.hpp +/*! \file thrust/system/hip/hipstdpar/impl/batch.hpp * \brief Batch operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/generation.hpp b/thrust/system/hip/hipstdpar/impl/generation.hpp index 0f4995870..67f96a767 100644 --- a/thrust/system/hip/hipstdpar/impl/generation.hpp +++ b/thrust/system/hip/hipstdpar/impl/generation.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/generation.hpp +/*! \file thrust/system/hip/hipstdpar/impl/generation.hpp * \brief Generation operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/heap.hpp b/thrust/system/hip/hipstdpar/impl/heap.hpp index d00f25a69..d27e0e5a5 100644 --- a/thrust/system/hip/hipstdpar/impl/heap.hpp +++ b/thrust/system/hip/hipstdpar/impl/heap.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/heap.hpp +/*! \file thrust/system/hip/hipstdpar/impl/heap.hpp * \brief Heap operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/hipstd.hpp b/thrust/system/hip/hipstdpar/impl/hipstd.hpp index 661a05593..678eeea82 100644 --- a/thrust/system/hip/hipstdpar/impl/hipstd.hpp +++ b/thrust/system/hip/hipstdpar/impl/hipstd.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/hipstd.hpp +/*! \file thrust/system/hip/hipstdpar/impl/hipstd.hpp * \brief hipstd utilities implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp b/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp index d44840962..5311b1a86 100644 --- a/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp +++ b/thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/lexicographical_comparison.hpp +/*! \file thrust/system/hip/hipstdpar/impl/lexicographical_comparison.hpp * \brief Lexicographical comparison operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/merge.hpp b/thrust/system/hip/hipstdpar/impl/merge.hpp index 38d17fafe..b3cb654b1 100644 --- a/thrust/system/hip/hipstdpar/impl/merge.hpp +++ b/thrust/system/hip/hipstdpar/impl/merge.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/merge.hpp +/*! \file thrust/system/hip/hipstdpar/impl/merge.hpp * \brief Merge operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/min_max.hpp b/thrust/system/hip/hipstdpar/impl/min_max.hpp index 341175035..939623468 100644 --- a/thrust/system/hip/hipstdpar/impl/min_max.hpp +++ b/thrust/system/hip/hipstdpar/impl/min_max.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/min_max.hpp +/*! \file thrust/system/hip/hipstdpar/impl/min_max.hpp * \brief Minimum/maximum operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/numeric.hpp b/thrust/system/hip/hipstdpar/impl/numeric.hpp index 61541fe55..32c83bafb 100644 --- a/thrust/system/hip/hipstdpar/impl/numeric.hpp +++ b/thrust/system/hip/hipstdpar/impl/numeric.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/numeric.hpp +/*! \file thrust/system/hip/hipstdpar/impl/numeric.hpp * \brief Numeric operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/order_changing.hpp b/thrust/system/hip/hipstdpar/impl/order_changing.hpp index 3fc020a5f..1169650cc 100644 --- a/thrust/system/hip/hipstdpar/impl/order_changing.hpp +++ b/thrust/system/hip/hipstdpar/impl/order_changing.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/order_changing.hpp +/*! \file thrust/system/hip/hipstdpar/impl/order_changing.hpp * \brief Order-changing operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/partitioning.hpp b/thrust/system/hip/hipstdpar/impl/partitioning.hpp index dbd8b490f..fa1af3609 100644 --- a/thrust/system/hip/hipstdpar/impl/partitioning.hpp +++ b/thrust/system/hip/hipstdpar/impl/partitioning.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/partitioning.hpp +/*! \file thrust/system/hip/hipstdpar/impl/partitioning.hpp * \brief Partitioning operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/removing.hpp b/thrust/system/hip/hipstdpar/impl/removing.hpp index 6fd210494..30bb9a7c5 100644 --- a/thrust/system/hip/hipstdpar/impl/removing.hpp +++ b/thrust/system/hip/hipstdpar/impl/removing.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/removing.hpp +/*! \file thrust/system/hip/hipstdpar/impl/removing.hpp * \brief Removing operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/search.hpp b/thrust/system/hip/hipstdpar/impl/search.hpp index b2b133394..e2dfe0105 100644 --- a/thrust/system/hip/hipstdpar/impl/search.hpp +++ b/thrust/system/hip/hipstdpar/impl/search.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/search.hpp +/*! \file thrust/system/hip/hipstdpar/impl/search.hpp * \brief Search operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/set.hpp b/thrust/system/hip/hipstdpar/impl/set.hpp index 3dda4bd21..9cdf21e19 100644 --- a/thrust/system/hip/hipstdpar/impl/set.hpp +++ b/thrust/system/hip/hipstdpar/impl/set.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/set.hpp +/*! \file thrust/system/hip/hipstdpar/impl/set.hpp * \brief Set operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/sorting.hpp b/thrust/system/hip/hipstdpar/impl/sorting.hpp index 6a71c1af2..53f7953ad 100644 --- a/thrust/system/hip/hipstdpar/impl/sorting.hpp +++ b/thrust/system/hip/hipstdpar/impl/sorting.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/sorting.hpp +/*! \file thrust/system/hip/hipstdpar/impl/sorting.hpp * \brief Sorting operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/swap.hpp b/thrust/system/hip/hipstdpar/impl/swap.hpp index 38b7aec06..1571ca96a 100644 --- a/thrust/system/hip/hipstdpar/impl/swap.hpp +++ b/thrust/system/hip/hipstdpar/impl/swap.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/swap.hpp +/*! \file thrust/system/hip/hipstdpar/impl/swap.hpp * \brief Swap operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/transformation.hpp b/thrust/system/hip/hipstdpar/impl/transformation.hpp index 34b639775..9e9293bf9 100644 --- a/thrust/system/hip/hipstdpar/impl/transformation.hpp +++ b/thrust/system/hip/hipstdpar/impl/transformation.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/transformation.hpp +/*! \file thrust/system/hip/hipstdpar/impl/transformation.hpp * \brief Transformation operations implementation detail header for HIPSTDPAR. */ diff --git a/thrust/system/hip/hipstdpar/impl/uninitialized.hpp b/thrust/system/hip/hipstdpar/impl/uninitialized.hpp index aee25d3fc..ec9be0127 100644 --- a/thrust/system/hip/hipstdpar/impl/uninitialized.hpp +++ b/thrust/system/hip/hipstdpar/impl/uninitialized.hpp @@ -12,7 +12,7 @@ * limitations under the License. */ -/*! \file thrust/system/hip/hipstdpar/include/uninitialized.hpp +/*! \file thrust/system/hip/hipstdpar/impl/uninitialized.hpp * \brief Operations on unitialized memory implementation detail header for HIPSTDPAR. */ From 1f060d8a46b4ff6e2f8acab5baab7b569b74daec Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Mon, 4 Nov 2024 14:41:34 +0000 Subject: [PATCH 43/44] Prevent building hipstdpar tests when no compatible libstdc++ is present --- README.md | 1 + test/CMakeLists.txt | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index adad3247c..c6d1e1409 100644 --- a/README.md +++ b/README.md @@ -299,6 +299,7 @@ ctest --output-on-failure #### Requirements * [rocPRIM](https://github.com/ROCm/rocPRIM) and [rocThrust](https://github.com/ROCm/rocThrust) libraries * [TBB](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onetbb.html) library + * Notice that oneTBB (oneAPI TBB) may fail to compile when libstdc++-9 or -10 is used, due to them using legacy TBB interfaces that are incompatible with the oneTBB ones (see the [release notes](https://www.intel.com/content/www/us/en/developer/articles/release-notes/intel-oneapi-threading-building-blocks-release-notes.html)). * CMake (3.10.2 or later) ## Support diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7f038ced0..1bd8a8bdd 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -252,7 +252,14 @@ if(BUILD_TEST OR BUILD_HIPSTDPAR_TEST) "Not building hipstdpar tests, currently they do not support Windows." ) else() - add_subdirectory(hipstdpar) + if ((CMAKE_C_COMPILER_ID MATCHES GNU AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0.0) OR NOT CMAKE_C_COMPILER_ID MATCHES GNU) + add_subdirectory(hipstdpar) + else() + message( + STATUS + "Not building hipstdpar tests, libstdc++-11 or greater is required." + ) + endif() endif() endif() From 5fdf870d638ee8075b0167e714f614a88417815a Mon Sep 17 00:00:00 2001 From: Beatriz Navidad Vilches Date: Wed, 6 Nov 2024 08:36:25 +0000 Subject: [PATCH 44/44] Disable TBB tests build --- test/hipstdpar/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/hipstdpar/CMakeLists.txt b/test/hipstdpar/CMakeLists.txt index 3fce6ecba..c93ca7222 100644 --- a/test/hipstdpar/CMakeLists.txt +++ b/test/hipstdpar/CMakeLists.txt @@ -84,6 +84,9 @@ if(NOT TARGET TBB::tbb AND NOT TARGET tbb) GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git GIT_TAG 1c4c93fc5398c4a1acb3492c02db4699f3048dea # v2021.13.0 ) + # Disable tests for TBB + set(TBB_TEST OFF CACHE BOOL "Disable TBB tests" FORCE) + FetchContent_MakeAvailable(thread-building-blocks) else() find_package(TBB REQUIRED)