diff --git a/.gitmodules b/.gitmodules index 3fb7a9dd..9497040a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "librapid/vendor/CLBlast"] path = librapid/vendor/CLBlast url = https://github.com/CNugteren/CLBlast.git +[submodule "librapid/vendor/xsimd"] + path = librapid/vendor/xsimd + url = https://github.com/xtensor-stack/xsimd.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 2307cd45..30005daf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -415,19 +415,14 @@ endif () # Add dependencies add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/fmt") -add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/Vc") - -if (NOT MINGW) - # scnlib does not support MinGW, since it does not implement std::from_chars, which is required by the library - add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/scnlib") -else () - message(WARNING "[ LIBRAPID ] scnlib cannot be built by MinGW, so it will not be enabled") - target_compile_definitions(${module_name} PUBLIC LIBRAPID_MINGW) -endif () +# add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/Vc") +add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/xsimd") +add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/librapid/vendor/scnlib") target_compile_definitions(fmt PUBLIC FMT_HEADER_ONLY) -target_compile_definitions(Vc PRIVATE Vc_HACK_OSTREAM_FOR_TTY) -target_link_libraries(${module_name} PUBLIC fmt scn Vc) +# target_compile_definitions(Vc PRIVATE Vc_HACK_OSTREAM_FOR_TTY) +# target_link_libraries(${module_name} PUBLIC fmt scn Vc xsimd) +target_link_libraries(${module_name} PUBLIC fmt scn xsimd) if (${LIBRAPID_USE_MULTIPREC}) # Load MPIR @@ -484,15 +479,18 @@ if (LIBRAPID_FAST_MATH) target_compile_definitions(${module_name} PUBLIC LIBRAPID_FAST_MATH) endif () -set(LIBRAPID_ARCH_FLAGS) if (LIBRAPID_NATIVE_ARCH) message(STATUS "[ LIBRAPID ] Compiling for native architecture") - OptimizeForArchitecture() - target_compile_options(${module_name} PUBLIC ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS}) - target_compile_definitions(${module_name} PUBLIC LIBRAPID_NATIVE_ARCH) - set(LIBRAPID_ARCH_FLAGS ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS}) - message(STATUS "[ LIBRAPID ] Additional Definitions: ${Vc_DEFINITIONS}") - message(STATUS "[ LIBRAPID ] Supported flags: ${Vc_ARCHITECTURE_FLAGS}") + + include(ArchDetect2) + target_compile_options(${module_name} PUBLIC ${LIBRAPID_ARCH_FLAGS}) + +# OptimizeForArchitecture() +# target_compile_options(${module_name} PUBLIC ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS}) +# target_compile_definitions(${module_name} PUBLIC LIBRAPID_NATIVE_ARCH) +# set(LIBRAPID_ARCH_FLAGS ${Vc_DEFINITIONS} ${Vc_ARCHITECTURE_FLAGS}) +# message(STATUS "[ LIBRAPID ] Additional Definitions: ${Vc_DEFINITIONS}") +# message(STATUS "[ LIBRAPID ] Supported flags: ${Vc_ARCHITECTURE_FLAGS}") endif () # Add defines for CUDA vector widths diff --git a/cmake/ArchDetect2.cmake b/cmake/ArchDetect2.cmake new file mode 100644 index 00000000..f8bba755 --- /dev/null +++ b/cmake/ArchDetect2.cmake @@ -0,0 +1,243 @@ +INCLUDE(CheckCXXSourceRuns) + +set(COMPILER_GNU false) +set(COMPILER_INTEL false) +set(COMPILER_CLANG false) +set(COMPILER_MSVC false) + +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(COMPILER_GNU true) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + set(COMPILER_INTEL true) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set(COMPILER_CLANG true) +elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") + set(COMPILER_MSVC true) +else () + # Unknown Compiler +endif () + +set(LIBRAPID_ARCH_FLAGS) +set(LIBRAPID_ARCH_FOUND) + +# Function to test a given SIMD capability +function(check_simd_capability FLAG_GNU FLAG_MSVC NAME TEST_SOURCE VAR) + set(CMAKE_REQUIRED_FLAGS) + if (COMPILER_GNU OR COMPILER_INTEL OR COMPILER_CLANG) + set(CMAKE_REQUIRED_FLAGS "${FLAG_GNU}") + elseif (COMPILER_MSVC) # reserve for WINDOWS + set(CMAKE_REQUIRED_FLAGS "${FLAG_MSVC}") + endif () + + CHECK_CXX_SOURCE_RUNS("${TEST_SOURCE}" ${VAR}) + + if (${${VAR}}) + if (COMPILER_GNU OR COMPILER_INTEL OR COMPILER_CLANG) + # set(LIBRAPID_ARCH_FLAGS "${LIBRAPID_ARCH_FLAGS} ${FLAG_GNU}" PARENT_SCOPE) + + list(APPEND LIBRAPID_ARCH_FLAGS ${FLAG_GNU}) + set(LIBRAPID_ARCH_FLAGS ${LIBRAPID_ARCH_FLAGS} PARENT_SCOPE) + + message(STATUS "[ LIBRAPID ] ${NAME} found: ${FLAG_GNU}") + elseif (MSVC) + # set(LIBRAPID_ARCH_FLAGS "${LIBRAPID_ARCH_FLAGS} ${FLAG_MSVC}" PARENT_SCOPE) + + list(APPEND LIBRAPID_ARCH_FLAGS ${FLAG_MSVC}) + set(LIBRAPID_ARCH_FLAGS ${LIBRAPID_ARCH_FLAGS} PARENT_SCOPE) + + message(STATUS "[ LIBRAPID ] ${NAME} found: ${FLAG_MSVC}") + endif () + set(LIBRAPID_ARCH_FOUND TRUE PARENT_SCOPE) + else () + message(STATUS "[ LIBRAPID ] ${NAME} not found") + endif () +endfunction() + +# Check SSE2 (not a valid flag for MSVC) +check_simd_capability("-msse2" "" "SSE2" " +#include +int main() { + __m128i a = _mm_set_epi32 (-1, 2, -3, 4); + __m128i result = _mm_abs_epi32 (a); + return 0; +}" SIMD_SSE2) + +# Check SSE3 (not a valid flag for MSVC) +check_simd_capability("-msse3" "" "SSE3" " +#include +int main() { + __m128 a = _mm_set_ps (-1.0f, 2.0f, -3.0f, 4.0f); + __m128 b = _mm_set_ps (1.0f, 2.0f, 3.0f, 4.0f); + __m128 result = _mm_addsub_ps (a, b); + return 0; +}" SIMD_SSE3) + +# Check SSSE3 (not a valid flag for MSVC) +check_simd_capability("-mssse3" "" "SSSE3" " +#include +int main() { + __m128i a = _mm_set_epi8(-1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4); + __m128i result = _mm_abs_epi8(a); + return 0; +}" SIMD_SSSE3) + +# Check SSE4.1 (not a valid flag for MSVC) +check_simd_capability("-msse4.1" "" "SSE4.1" " +#include +int main() { + __m128i a = _mm_set_epi32(-1, 2, -3, 4); + __m128i result = _mm_abs_epi32(a); + return 0; +}" SIMD_SSE4_1) + +# Check SSE4.2 (not a valid flag for MSVC) +check_simd_capability("-msse4.2" "" "SSE4.2" " +#include +int main() { + __m128i a = _mm_set_epi32(-1, 2, -3, 4); + __m128i result = _mm_abs_epi32(a); + return 0; +}" SIMD_SSE4_2) + +# Check AVX +check_simd_capability("-mavx" "/arch:AVX" "AVX" " +#include +int main() { + __m256 a = _mm256_set_ps(-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); + __m256 result = _mm256_abs_ps(a); + return 0; +}" SIMD_AVX) + +# Check AVX2 +check_simd_capability("-mavx2" "/arch:AVX2" "AVX2" " +#include +int main() { + __m256i a = _mm256_set_epi32(-1, 2, -3, 4, -1, 2, -3, 4); + __m256i result = _mm256_abs_epi32(a); + return 0; +}" SIMD_AVX2) + +# Check AVX512F +check_simd_capability("-mavx512f" "/arch:AVX512" "AVX512F" " +#include +int main() { + __m512i a = _mm512_set_epi32(-1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4, -1, 2, -3, 4); + __m512i result = _mm512_abs_epi32(a); + return 0; +}" SIMD_AVX512F) + +# Check AVX512BW +check_simd_capability("-mavx512bw" "/arch:AVX512" "AVX512BW" " +#include +int main() { + __m512i a = _mm512_set_epi64(-1, 2, -3, 4, -1, 2, -3, 4); + __m512i result = _mm512_abs_epi8(a); + return 0; +}" SIMD_AVX512BW) + +# Check AVX512CD +check_simd_capability("-mavx512cd" "/arch:AVX512" "AVX512CD" " +#include +int main() { + __m512i a = _mm512_set_epi64(-1, 2, -3, 4, -1, 2, -3, 4); + __m512i result = _mm512_conflict_epi64(a); + return 0; +}" SIMD_AVX512CD) + +# Check AVX512DQ +check_simd_capability("-mavx512dq" "/arch:AVX512" "AVX512DQ" " +#include +int main() { + __m512d a = _mm512_set_pd(-1.0, 2.0, -3.0, 4.0, -1.0, 2.0, -3.0, 4.0); + __m512d result = _mm512_abs_pd(a); + return 0; +}" SIMD_AVX512DQ) + +# Check AVX512ER +check_simd_capability("-mavx512er" "/arch:AVX512" "AVX512ER" " +#include +int main() { + __m512d a = _mm512_set_pd(-1.0, 2.0, -3.0, 4.0, -1.0, 2.0, -3.0, 4.0); + __m512d result = _mm512_exp_pd(a); + return 0; +}" SIMD_AVX512ER) + +# Check AVX512PF +check_simd_capability("-mavx512pf" "/arch:AVX512" "AVX512PF" " +#include +int main() { + __m512 a = _mm512_set_ps(-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); + __m512 result = _mm512_exp_ps(a); + return 0; +}" SIMD_AVX512PF) + +# ARM +check_simd_capability("-march=armv7-a" "" "ARMv7" " +#include +int main() { + int32x4_t a = vdupq_n_s32(1); + int32x4_t b = vdupq_n_s32(2); + int32x4_t result = vaddq_s32(a, b); + return 0; +}" SIMD_ARMv7) + +check_simd_capability("-march=armv8-a" "" "ARMv8" " +#include +int main() { + int32x4_t a = vdupq_n_s32(1); + int32x4_t b = vdupq_n_s32(2); + int32x4_t result = vaddq_s32(a, b); + return 0; +}" SIMD_ARMv8) + +# ARM64 +check_simd_capability("-march=armv8.1-a" "" "ARMv8.1" " +#include +int main() { + int32x4_t a = vdupq_n_s32(1); + int32x4_t b = vdupq_n_s32(2); + int32x4_t result = vaddq_s32(a, b); + return 0; +}" SIMD_ARMv8_1) + +check_simd_capability("-march=armv8.2-a" "" "ARMv8.2" " +#include +int main() { + int32x4_t a = vdupq_n_s32(1); + int32x4_t b = vdupq_n_s32(2); + int32x4_t result = vaddq_s32(a, b); + return 0; +}" SIMD_ARMv8_2) + +check_simd_capability("-march=armv8.3-a" "" "ARMv8.3" " +#include +int main() { + int32x4_t a = vdupq_n_s32(1); + int32x4_t b = vdupq_n_s32(2); + int32x4_t result = vaddq_s32(a, b); + return 0; +}" SIMD_ARMv8_3) + +check_simd_capability("-march=armv8.4-a" "" "ARMv8.4" " +#include +int main() { + int32x4_t a = vdupq_n_s32(1); + int32x4_t b = vdupq_n_s32(2); + int32x4_t result = vaddq_s32(a, b); + return 0; +}" SIMD_ARMv8_4) + +check_simd_capability("-march=armv8.5-a" "" "ARMv8.5" " +#include +int main() { + int32x4_t a = vdupq_n_s32(1); + int32x4_t b = vdupq_n_s32(2); + int32x4_t result = vaddq_s32(a, b); + return 0; +}" SIMD_ARMv8_5) + +if (LIBRAPID_ARCH_FOUND) + message(STATUS "[ LIBRAPID ] Architecture Flags: ${LIBRAPID_ARCH_FLAGS}") +else() + message(STATUS "[ LIBRAPID ] Architecture Flags Not Found") +endif() diff --git a/librapid/include/librapid/array/arrayContainer.hpp b/librapid/include/librapid/array/arrayContainer.hpp index 26940510..358025c2 100644 --- a/librapid/include/librapid/array/arrayContainer.hpp +++ b/librapid/include/librapid/array/arrayContainer.hpp @@ -668,9 +668,7 @@ namespace librapid { template auto ArrayContainer::packet(size_t index) const -> Packet { - Packet res; - res.load(m_storage.begin() + index); - return res; + return xsimd::load_aligned(m_storage.begin() + index); } template @@ -681,7 +679,7 @@ namespace librapid { template void ArrayContainer::writePacket(size_t index, const Packet &value) { - value.store(m_storage.begin() + index); + value.store_aligned(m_storage.begin() + index); } template diff --git a/librapid/include/librapid/array/storage.hpp b/librapid/include/librapid/array/storage.hpp index fff1fc46..b81f6fbb 100644 --- a/librapid/include/librapid/array/storage.hpp +++ b/librapid/include/librapid/array/storage.hpp @@ -184,11 +184,14 @@ namespace librapid { /// \param newSize New size of the Storage object LIBRAPID_ALWAYS_INLINE void resizeImpl(SizeType newSize); -#if defined(LIBRAPID_NATIVE_ARCH) && !defined(LIBRAPID_APPLE) - alignas(LIBRAPID_DEFAULT_MEM_ALIGN) Pointer m_begin = nullptr; -#else - Pointer m_begin = nullptr; // Pointer to the beginning of the data -#endif +//#if defined(LIBRAPID_NATIVE_ARCH) && !defined(LIBRAPID_APPLE) +// alignas(LIBRAPID_DEFAULT_MEM_ALIGN) Pointer m_begin = nullptr; +//#else +// Pointer m_begin = nullptr; // Pointer to the beginning of the data +//#endif + + Pointer m_begin = nullptr; + SizeType m_size = 0; // Number of elements in the Storage object bool m_ownsData = true; // Whether this Storage object owns the data it points to }; diff --git a/librapid/include/librapid/core/config.hpp b/librapid/include/librapid/core/config.hpp index b1b3d178..55bef30e 100644 --- a/librapid/include/librapid/core/config.hpp +++ b/librapid/include/librapid/core/config.hpp @@ -165,62 +165,63 @@ # define LIBRAPID_AVX512 # define LIBRAPID_ARCH AVX512_2 # define LIBRAPID_ARCH_NAME "AVX512" -# define LIBRAPID_DEFAULT_MEM_ALIGN 64 +# define LIBRAPID_DEFAULT_MEM_ALIGN 256 #elif defined(__AVX512F__) || defined(__AVX512__) # define LIBRAPID_AVX512 # define LIBRAPID_ARCH AVX512 # define LIBRAPID_ARCH_NAME "AVX512" +# define LIBRAPID_DEFAULT_MEM_ALIGN 256 #elif defined(__AVX2__) # define LIBRAPID_AVX2 # define LIBRAPID_ARCH AVX2 # define LIBRAPID_ARCH_NAME "AVX2" -# define LIBRAPID_DEFAULT_MEM_ALIGN 32 +# define LIBRAPID_DEFAULT_MEM_ALIGN 128 #elif defined(__AVX__) # define LIBRAPID_AVX # define LIBRAPID_ARCH AVX # define LIBRAPID_ARCH_NAME "AVX" -# define LIBRAPID_DEFAULT_MEM_ALIGN 32 +# define LIBRAPID_DEFAULT_MEM_ALIGN 128 #elif defined(__SSE4_2__) # define LIBRAPID_SSE42 # define LIBRAPID_ARCH SSE4_2 # define LIBRAPID_ARCH_NAME "SSE4.2" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 #elif defined(__SSE4_1__) # define LIBRAPID_SSE41 # define LIBRAPID_ARCH SSE4_1 # define LIBRAPID_ARCH_NAME "SSE4.1" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 #elif defined(__SSSE3__) # define LIBRAPID_SSSE3 # define LIBRAPID_ARCH SSSE3 # define LIBRAPID_ARCH_NAME "SSSE3" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 #elif defined(__SSE3__) # define LIBRAPID_SSE3 # define LIBRAPID_ARCH SSE3 # define LIBRAPID_ARCH_NAME "SSE3" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 #elif defined(__SSE2__) || defined(__x86_64__) # define LIBRAPID_SSE2 # define LIBRAPID_ARCH SSE2 # define LIBRAPID_ARCH_NAME "SSE2" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 #elif defined(__SSE__) # define LIBRAPID_SSE # define LIBRAPID_ARCH SSE # define LIBRAPID_ARCH_NAME "SSE" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 #elif defined(_M_IX86_FP) // Defined in MS compiler. 1: SSE, 2: SSE2 # if _M_IX86_FP == 1 # define LIBRAPID_SSE # define LIBRAPID_ARCH SSE # define LIBRAPID_ARCH_NAME "SSE" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 # elif _M_IX86_FP == 2 # define LIBRAPID_SSE2 # define LIBRAPID_ARCH SSE2 # define LIBRAPID_ARCH_NAME "SSE2" -# define LIBRAPID_DEFAULT_MEM_ALIGN 16 +# define LIBRAPID_DEFAULT_MEM_ALIGN 64 # endif // _M_IX86_FP #else # define LIBRAPID_ARCH 0 diff --git a/librapid/include/librapid/core/librapidPch.hpp b/librapid/include/librapid/core/librapidPch.hpp index c552bcd2..d8457401 100644 --- a/librapid/include/librapid/core/librapidPch.hpp +++ b/librapid/include/librapid/core/librapidPch.hpp @@ -71,10 +71,12 @@ # pragma warning(disable : 4127) // conditional expression is constant #endif -#include -#include -#include -#include +// #include +// #include +// #include +// #include + +#include #if defined(_MSC_VER) # pragma warning(pop) diff --git a/librapid/include/librapid/core/traits.hpp b/librapid/include/librapid/core/traits.hpp index e60295b4..9d5125e1 100644 --- a/librapid/include/librapid/core/traits.hpp +++ b/librapid/include/librapid/core/traits.hpp @@ -211,9 +211,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = int8_t; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "int8_t"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -242,9 +242,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = uint8_t; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "uint8_t"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -273,9 +273,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = int16_t; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "int16_t"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -304,9 +304,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = uint16_t; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "uint16_t"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -335,9 +335,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = int32_t; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "int32_t"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -366,9 +366,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = uint32_t; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "uint32_t"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -459,9 +459,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = float; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "float"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -490,9 +490,9 @@ namespace librapid { struct TypeInfo { static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; using Scalar = double; - using Packet = Vc::Vector; + using Packet = xsimd::batch; using Backend = backend::CPU; - static constexpr int64_t packetWidth = Packet::size(); + static constexpr int64_t packetWidth = Packet::size; static constexpr char name[] = "double"; static constexpr bool supportsArithmetic = true; static constexpr bool supportsLogical = true; @@ -518,7 +518,7 @@ namespace librapid { }; template - struct TypeInfo> { + struct TypeInfo> { static constexpr detail::LibRapidType type = detail::LibRapidType::Vector; using Scalar = T; using Packet = std::false_type; @@ -736,32 +736,6 @@ namespace librapid { }; #endif - template - struct TypeInfo> { - static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; - using Scalar = typename VectorType::EntryType; - using Packet = std::false_type; - using Backend = backend::CPU; - static constexpr int64_t packetWidth = 1; - static constexpr char name[] = "Vc::ElementReference"; - static constexpr bool supportsArithmetic = true; - static constexpr bool supportsLogical = false; - static constexpr bool supportsBinary = false; - static constexpr bool allowVectorisation = false; - - static constexpr bool canAlign = true; - static constexpr bool canMemcpy = false; - - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } - }; - template<> struct TypeInfo { static constexpr char name[] = "CPU"; diff --git a/librapid/include/librapid/math/vectorImpl.hpp b/librapid/include/librapid/math/vectorImpl.hpp index cdf97417..9ba60f27 100644 --- a/librapid/include/librapid/math/vectorImpl.hpp +++ b/librapid/include/librapid/math/vectorImpl.hpp @@ -756,11 +756,11 @@ namespace librapid { return val; } - template - constexpr auto scalarExtractor(const Vc_1::Detail::ElementReference &val) { - using Scalar = typename Vc_1::Detail::ElementReference::value_type; - return static_cast(val); - } + // template + // constexpr auto scalarExtractor(const Vc_1::Detail::ElementReference &val) { + // using Scalar = typename Vc_1::Detail::ElementReference::value_type; + // return static_cast(val); + // } template constexpr auto scalarVectorCaster(const T &val) { diff --git a/librapid/include/librapid/simd/vecOps.hpp b/librapid/include/librapid/simd/vecOps.hpp index d830168f..1dd8199b 100644 --- a/librapid/include/librapid/simd/vecOps.hpp +++ b/librapid/include/librapid/simd/vecOps.hpp @@ -1,6 +1,8 @@ #ifndef LIBRAPID_SIMD_TRIGONOMETRY #define LIBRAPID_SIMD_TRIGONOMETRY +#if 0 + namespace librapid { namespace typetraits { template @@ -215,4 +217,6 @@ namespace librapid { } } // namespace librapid +#endif + #endif // LIBRAPID_SIMD_TRIGONOMETRY \ No newline at end of file diff --git a/librapid/vendor/xsimd b/librapid/vendor/xsimd new file mode 160000 index 00000000..e6fa5aca --- /dev/null +++ b/librapid/vendor/xsimd @@ -0,0 +1 @@ +Subproject commit e6fa5aca6320d6ccaf24c123ab2af9b0f2f09cc1