Skip to content

Commit

Permalink
Merge pull request #43 from ROCmSoftwarePlatform/develop_stream
Browse files Browse the repository at this point in the history
develop_stream to develop
  • Loading branch information
ex-rzr authored Feb 6, 2019
2 parents b714afb + 353a794 commit bf19e05
Show file tree
Hide file tree
Showing 45 changed files with 803 additions and 596 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,13 @@ set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PR
if(HIP_PLATFORM STREQUAL "hcc")
rocm_create_package(
NAME rocprim
DESCRIPTION "Radeon Open Compute Parallel Primitives Libary"
DESCRIPTION "Radeon Open Compute Parallel Primitives Library"
MAINTAINER "Stream HPC Maintainers <[email protected]>"
)
else()
rocm_create_package(
NAME rocprim-hipcub
DESCRIPTION "Radeon Open Compute Parallel Primitives Libary (hipCUB only)"
DESCRIPTION "Radeon Open Compute Parallel Primitives Library (hipCUB only)"
MAINTAINER "Stream HPC Maintainers <[email protected]>"
)
endif()
2 changes: 1 addition & 1 deletion benchmark/benchmark_hc_block_histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#include "benchmark_utils.hpp"

// rocPRIM
#include <rocprim/block/block_histogram.hpp>
#include <rocprim/rocprim.hpp>

#ifndef DEFAULT_N
const size_t DEFAULT_N = 1024 * 1024 * 128;
Expand Down
2 changes: 2 additions & 0 deletions benchmark/benchmark_hc_block_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@
#include <chrono>
#include <vector>
#include <limits>
#include <codecvt>
#include <string>
#include <cstdio>
#include <cstdlib>
#include <locale>

// Google Benchmark
#include "benchmark/benchmark.h"
Expand Down
14 changes: 1 addition & 13 deletions benchmark/benchmark_hc_block_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,17 @@
#include <string>
#include <cstdio>
#include <cstdlib>
#include <locale>

// Google Benchmark
#include "benchmark/benchmark.h"
// CmdParser
#include "cmdparser.hpp"
#include "benchmark_utils.hpp"

// HIP API
#include <hip/hip_runtime.h>
#include <hip/hip_hcc.h>

// rocPRIM
#include <rocprim/rocprim.hpp>

#define HIP_CHECK(condition) \
{ \
hipError_t error = condition; \
if(error != hipSuccess){ \
std::cout << "HIP error: " << error << " line: " << __LINE__ << std::endl; \
exit(error); \
} \
}

#ifndef DEFAULT_N
const size_t DEFAULT_N = 1024 * 1024 * 128;
#endif
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_hc_device_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
// HC API
#include <hcc/hc.hpp>

// rocPRIM HIP API
// rocPRIM
#include <rocprim/rocprim.hpp>

// CmdParser
Expand Down
11 changes: 1 addition & 10 deletions benchmark/benchmark_hc_warp_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,10 @@
// HC API
#include <hcc/hc.hpp>
// rocPRIM
#include <rocprim/warp/warp_scan.hpp>
#include <rocprim/rocprim.hpp>

#include "benchmark_utils.hpp"

#define HIP_CHECK(condition) \
{ \
hipError_t error = condition; \
if(error != hipSuccess){ \
std::cout << "HIP error: " << error << " line: " << __LINE__ << std::endl; \
exit(error); \
} \
}

#ifndef DEFAULT_N
const size_t DEFAULT_N = 1024 * 1024 * 32;
#endif
Expand Down
11 changes: 1 addition & 10 deletions benchmark/benchmark_hc_warp_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,10 @@
// HC API
#include <hcc/hc.hpp>
// rocPRIM
#include <rocprim/warp/warp_sort.hpp>
#include <rocprim/rocprim.hpp>

#include "benchmark_utils.hpp"

#define HIP_CHECK(condition) \
{ \
hipError_t error = condition; \
if(error != hipSuccess){ \
std::cout << "HIP error: " << error << " line: " << __LINE__ << std::endl; \
exit(error); \
} \
}

#ifndef DEFAULT_N
const size_t DEFAULT_N = 1024 * 1024 * 32;
#endif
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_hip_block_histogram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#include <hip/hip_hcc.h>

// rocPRIM
#include <rocprim/block/block_histogram.hpp>
#include <rocprim/rocprim.hpp>

#define HIP_CHECK(condition) \
{ \
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_hip_block_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#include <hip/hip_hcc.h>

// rocPRIM
#include <rocprim/block/block_reduce.hpp>
#include <rocprim/rocprim.hpp>

#define HIP_CHECK(condition) \
{ \
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_hip_device_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#include <hip/hip_runtime.h>

// rocPRIM HIP API
#include <rocprim/device/device_reduce_hip.hpp>
#include <rocprim/rocprim.hpp>

// CmdParser
#include "cmdparser.hpp"
Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_hip_device_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include <hip/hip_runtime.h>
#include <hip/hip_hcc.h>
// rocPRIM
#include <rocprim/device/device_scan_hip.hpp>
#include <rocprim/rocprim.hpp>

#include "benchmark_utils.hpp"

Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_hip_warp_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include <hip/hip_runtime.h>
#include <hip/hip_hcc.h>
// rocPRIM
#include <rocprim/warp/warp_scan.hpp>
#include <rocprim/rocprim.hpp>

#include "benchmark_utils.hpp"

Expand Down
2 changes: 1 addition & 1 deletion benchmark/benchmark_hip_warp_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#include <hip/hip_runtime.h>
#include <hip/hip_hcc.h>
// rocPRIM
#include <rocprim/warp/warp_sort.hpp>
#include <rocprim/rocprim.hpp>

#include "benchmark_utils.hpp"

Expand Down
2 changes: 1 addition & 1 deletion cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ if(BUILD_TEST)
download_project(
PROJ googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG master
GIT_TAG release-1.8.1
INSTALL_DIR ${GTEST_ROOT}
CMAKE_ARGS -DBUILD_GTEST=ON -DINSTALL_GTEST=ON -Dgtest_force_shared_crt=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
LOG_DOWNLOAD TRUE
Expand Down
4 changes: 2 additions & 2 deletions hipcub/include/hipcub/config.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the >Software>), to deal
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
Expand All @@ -10,7 +10,7 @@
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED >AS IS>, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
Expand Down
16 changes: 8 additions & 8 deletions rocprim/include/rocprim/block/block_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,8 @@ class block_scan
/// The signature of the \p prefix_callback_op should be equivalent to the following:
/// <tt>T f(const T &block_reduction);</tt>. The signature does not need to have
/// <tt>const &</tt>, but function object must not modify the objects passed to it.
/// The object will be called by the first thread in the first warp from the block with
/// block reduction of \p input values as input argument. The result will be used as the
/// The object will be called by the first warp of the block with block reduction of
/// \p input values as input argument. The result of the first thread will be used as the
/// block-wide prefix.
/// \param [in] scan_op - binary operation function object that will be used for scan.
/// The signature of the function should be equivalent to the following:
Expand Down Expand Up @@ -794,8 +794,8 @@ class block_scan
/// The signature of the \p prefix_callback_op should be equivalent to the following:
/// <tt>T f(const T &block_reduction);</tt>. The signature does not need to have
/// <tt>const &</tt>, but function object must not modify the objects passed to it.
/// The object will be called by the first thread in the first warp from the block with
/// block reduction of \p input values as input argument. The result will be used as the
/// The object will be called by the first warp of the block with block reduction of
/// \p input values as input argument. The result of the first thread will be used as the
/// block-wide prefix.
/// \param [in] scan_op - binary operation function object that will be used for scan.
/// The signature of the function should be equivalent to the following:
Expand Down Expand Up @@ -1168,8 +1168,8 @@ class block_scan
/// The signature of the \p prefix_callback_op should be equivalent to the following:
/// <tt>T f(const T &block_reduction);</tt>. The signature does not need to have
/// <tt>const &</tt>, but function object must not modify the objects passed to it.
/// The object will be called by the first thread in the first warp from the block with
/// block reduction of \p input values as input argument. The result will be used as the
/// The object will be called by the first warp of the block with block reduction of
/// \p input values as input argument. The result of the first thread will be used as the
/// block-wide prefix.
/// \param [in] scan_op - binary operation function object that will be used for scan.
/// The signature of the function should be equivalent to the following:
Expand Down Expand Up @@ -1579,8 +1579,8 @@ class block_scan
/// The signature of the \p prefix_callback_op should be equivalent to the following:
/// <tt>T f(const T &block_reduction);</tt>. The signature does not need to have
/// <tt>const &</tt>, but function object must not modify the objects passed to it.
/// The object will be called by the first thread in the first warp from the block with
/// block reduction of \p input values as input argument. The result will be used as the
/// The object will be called by the first warp of the block with block reduction of
/// \p input values as input argument. The result of the first thread will be used as the
/// block-wide prefix.
/// \param [in] scan_op - binary operation function object that will be used for scan.
/// The signature of the function should be equivalent to the following:
Expand Down
15 changes: 10 additions & 5 deletions rocprim/include/rocprim/block/detail/block_sort_bitonic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ class block_sort_bitonic
copy_to_shared(kv..., flat_tid, storage);

bool is_even = (flat_tid % 2) == 0;
unsigned int odd_id = (is_even) ? std::max(flat_tid, (unsigned int) 1) - 1 : std::min(flat_tid + 1, Size - 1);
unsigned int even_id = (is_even) ? std::min(flat_tid + 1, Size - 1) : std::max(flat_tid, (unsigned int) 1) - 1;
unsigned int odd_id = (is_even) ? ::rocprim::max(flat_tid, 1u) - 1 : ::rocprim::min(flat_tid + 1, Size - 1);
unsigned int even_id = (is_even) ? ::rocprim::min(flat_tid + 1, Size - 1) : ::rocprim::max(flat_tid, 1u) - 1;

#pragma unroll
for(unsigned int length = 0; length < Size; length++)
Expand Down Expand Up @@ -331,13 +331,18 @@ class block_sort_bitonic
copy_to_shared(kv..., flat_tid, storage);

bool is_even = (flat_tid % 2 == 0);
unsigned int odd_id = (is_even) ? std::max(flat_tid, (unsigned int) 1) - 1 : std::min(flat_tid + 1, size - 1);
unsigned int even_id = (is_even) ? std::min(flat_tid + 1, size - 1) : std::max(flat_tid, (unsigned int) 1) - 1;
unsigned int odd_id = (is_even) ? ::rocprim::max(flat_tid, 1u) - 1 : ::rocprim::min(flat_tid + 1, size - 1);
unsigned int even_id = (is_even) ? ::rocprim::min(flat_tid + 1, size - 1) : ::rocprim::max(flat_tid, 1u) - 1;

for(unsigned int length = 0; length < size; length++)
{
unsigned int next_id = (length % 2 == 0) ? even_id : odd_id;
swap(kv..., flat_tid, next_id, 0, storage, compare_function);
// Use only "valid" keys to ensure that compare_function will not use garbage keys
// for example, as indices of an array (a lookup table)
if(flat_tid < size)
{
swap(kv..., flat_tid, next_id, 0, storage, compare_function);
}
::rocprim::syncthreads();
copy_to_shared(kv..., flat_tid, storage);
}
Expand Down
6 changes: 6 additions & 0 deletions rocprim/include/rocprim/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@
#define ROCPRIM_DETAIL_USE_DPP true
#endif

#ifdef ROCPRIM_DISABLE_LOOKBACK_SCAN
#define ROCPRIM_DETAIL_USE_LOOKBACK_SCAN false
#else
#define ROCPRIM_DETAIL_USE_LOOKBACK_SCAN true
#endif

// Defines targeted AMD architecture. Supported values:
// * 803 (gfx803)
// * 900 (gfx900)
Expand Down
22 changes: 20 additions & 2 deletions rocprim/include/rocprim/detail/radix_sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,33 @@ template<class Key, class Enable = void>
struct radix_key_codec_base
{
static_assert(sizeof(Key) == 0,
"Only integral (except bool) and floating point types supported as radix sort keys");
"Only integral and floating point types supported as radix sort keys");
};

template<class Key>
struct radix_key_codec_base<
Key,
typename std::enable_if<::rocprim::is_integral<Key>::value && !std::is_same<bool, Key>::value>::type
typename std::enable_if<::rocprim::is_integral<Key>::value>::type
> : radix_key_codec_integral<Key, typename std::make_unsigned<Key>::type> { };

template<>
struct radix_key_codec_base<bool>
{
using bit_key_type = unsigned char;

ROCPRIM_DEVICE inline
static bit_key_type encode(bool key)
{
return static_cast<bit_key_type>(key);
}

ROCPRIM_DEVICE inline
static bool decode(bit_key_type bit_key)
{
return static_cast<bool>(bit_key);
}
};

template<>
struct radix_key_codec_base<::rocprim::half> : radix_key_codec_floating<::rocprim::half, unsigned short> { };

Expand Down
23 changes: 19 additions & 4 deletions rocprim/include/rocprim/detail/various.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,15 @@ struct match_fundamental_type
template<class T>
ROCPRIM_DEVICE inline
auto store_volatile(T * output, T value)
-> typename std::enable_if<::rocprim::is_fundamental<T>::value>::type
-> typename std::enable_if<std::is_fundamental<T>::value>::type
{
*const_cast<volatile T*>(output) = value;
}

template<class T>
ROCPRIM_DEVICE inline
auto store_volatile(T * output, T value)
-> typename std::enable_if<!::rocprim::is_fundamental<T>::value>::type
-> typename std::enable_if<!std::is_fundamental<T>::value>::type
{
using fundamental_type = typename match_fundamental_type<T>::type;
constexpr unsigned int n = sizeof(T) / sizeof(fundamental_type);
Expand All @@ -183,7 +183,7 @@ auto store_volatile(T * output, T value)
template<class T>
ROCPRIM_DEVICE inline
auto load_volatile(T * input)
-> typename std::enable_if<::rocprim::is_fundamental<T>::value, T>::type
-> typename std::enable_if<std::is_fundamental<T>::value, T>::type
{
T retval = *const_cast<volatile T*>(input);
return retval;
Expand All @@ -192,7 +192,7 @@ auto load_volatile(T * input)
template<class T>
ROCPRIM_DEVICE inline
auto load_volatile(T * input)
-> typename std::enable_if<!::rocprim::is_fundamental<T>::value, T>::type
-> typename std::enable_if<!std::is_fundamental<T>::value, T>::type
{
using fundamental_type = typename match_fundamental_type<T>::type;
constexpr unsigned int n = sizeof(T) / sizeof(fundamental_type);
Expand Down Expand Up @@ -226,6 +226,21 @@ struct raw_storage
}
};

// Checks if two iterators have the same type and value
template<class Iterator1, class Iterator2>
inline
bool are_iterators_equal(Iterator1, Iterator2)
{
return false;
}

template<class Iterator>
inline
bool are_iterators_equal(Iterator iter1, Iterator iter2)
{
return iter1 == iter2;
}

} // end namespace detail
END_ROCPRIM_NAMESPACE

Expand Down
Loading

0 comments on commit bf19e05

Please sign in to comment.