diff --git a/CMakeLists.txt b/CMakeLists.txt index ec760ca..adf4eca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 3.10) # Need at least 3.10 for gtest_discover_t project(blast VERSION 0.1 LANGUAGES CXX) # Enable modern C++ -set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD 23) # Allow for integration with other tools such as Intellisense set(CMAKE_EXPORT_COMPILE_COMMANDS ON) diff --git a/bench/blast/CMakeLists.txt b/bench/blast/CMakeLists.txt index 6d20751..3a46bb3 100644 --- a/bench/blast/CMakeLists.txt +++ b/bench/blast/CMakeLists.txt @@ -33,6 +33,7 @@ add_executable(bench-blast math/panel/DynamicGemm.cpp math/panel/StaticPotrf.cpp math/panel/DynamicPotrf.cpp + math/panel/StaticMatrixPointer.cpp ) target_compile_definitions(bench-blast diff --git a/bench/blast/math/panel/StaticMatrixPointer.cpp b/bench/blast/math/panel/StaticMatrixPointer.cpp new file mode 100644 index 0000000..3fb5f3a --- /dev/null +++ b/bench/blast/math/panel/StaticMatrixPointer.cpp @@ -0,0 +1,34 @@ +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include + + +namespace blast :: benchmark +{ + template + static void BM_static_panel_matrix_pointer(State& state) + { + size_t constexpr M = 8; + size_t constexpr N = 4; + + StaticPanelMatrix A; + auto pA = ptr(A, 0, 0); + + for (auto _ : state) + { + for (size_t i = 0; i < M; ++i) + for (size_t j = 0; j < N; ++j) + DoNotOptimize(pA(i, j)); + } + } + + + BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, double, aligned); + BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, double, unaligned); + BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, float, aligned); + BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, float, unaligned); +} diff --git a/bench/common/CMakeLists.txt b/bench/common/CMakeLists.txt index 030ae1e..ddc5c5a 100644 --- a/bench/common/CMakeLists.txt +++ b/bench/common/CMakeLists.txt @@ -16,6 +16,6 @@ target_link_libraries(bench-blast-common if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # More aggressive inlining with Clang target_compile_options(bench-blast-common - PUBLIC "-mllvm" "-inline-threshold=1000" + PUBLIC "-mllvm" "-inline-threshold=4000" ) endif() diff --git a/include/blast/blaze/math/TypeTraits.hpp b/include/blast/blaze/math/TypeTraits.hpp index 62b958e..ace3dee 100644 --- a/include/blast/blaze/math/TypeTraits.hpp +++ b/include/blast/blaze/math/TypeTraits.hpp @@ -9,5 +9,7 @@ #include #include #include +#include +#include #include #include diff --git a/include/blast/blaze/math/typetraits/IsAligned.hpp b/include/blast/blaze/math/typetraits/IsAligned.hpp new file mode 100644 index 0000000..d25505c --- /dev/null +++ b/include/blast/blaze/math/typetraits/IsAligned.hpp @@ -0,0 +1,24 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +#include +#include +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze matrix and vector types + * + * @tparam T matrix or vector type + */ + template + requires blaze::IsVector_v || blaze::IsMatrix_v + struct IsAligned : blaze::IsAligned {}; +} diff --git a/include/blast/blaze/math/typetraits/IsPadded.hpp b/include/blast/blaze/math/typetraits/IsPadded.hpp new file mode 100644 index 0000000..1699406 --- /dev/null +++ b/include/blast/blaze/math/typetraits/IsPadded.hpp @@ -0,0 +1,24 @@ +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#pragma once + +#include + +#include +#include +#include + + +namespace blast +{ + /** + * @brief Specialization for Blaze matrix and vector types + * + * @tparam T matrix or vector type + */ + template + requires blaze::IsVector_v || blaze::IsMatrix_v + struct IsPadded : blaze::IsPadded {}; +} diff --git a/include/blast/math/DynamicPanelMatrix.hpp b/include/blast/math/DynamicPanelMatrix.hpp index 5f54073..a5ad8ff 100644 --- a/include/blast/math/DynamicPanelMatrix.hpp +++ b/include/blast/math/DynamicPanelMatrix.hpp @@ -329,8 +329,6 @@ namespace blaze , bool SO > void makePositiveDefinite( DynamicPanelMatrix& matrix ) { - using blaze::randomize; - BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE( Type ); if( !isSquare( *matrix ) ) { @@ -349,16 +347,13 @@ namespace blaze gemm(A, trans(A), matrix, matrix); - // TODO: implement it as below after the matrix *= ctrans( matrix ) expression works. - - // randomize( matrix ); - // matrix *= ctrans( matrix ); - - // for( size_t i=0UL; i& m ) + // I could not figure out what causes it, but we are going to decouple DynamicPanelMatrix from Blaze, + // so this code will be gone anyway. + // + // BLAZE_INTERNAL_ASSERT( isHermitian( matrix ), "Non-symmetric matrix detected" ); } /*! \endcond */ //************************************************************************************************* diff --git a/include/blast/math/RowColumnVectorPointer.hpp b/include/blast/math/RowColumnVectorPointer.hpp index ae7a8b5..d3d0caf 100644 --- a/include/blast/math/RowColumnVectorPointer.hpp +++ b/include/blast/math/RowColumnVectorPointer.hpp @@ -1,16 +1,6 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once @@ -78,12 +68,6 @@ namespace blast } - SimdVecType broadcast() const noexcept - { - return ptr_.broadcast(); - } - - void store(SimdVecType const& val) const noexcept { ptr_.store(transposeFlag, val); @@ -123,13 +107,15 @@ namespace blast /** - * @brief Get reference to the pointed value. + * @brief Access element at specified offset * - * @return reference to the pointed value + * @param i offset + * + * @return reference to the element at specified offset */ - ElementType& operator*() noexcept + ElementType& operator[](ptrdiff_t i) const noexcept { - return *ptr_; + return transposeFlag == columnVector ? ptr_[i, 0] : ptr_[0, i]; } diff --git a/include/blast/math/algorithm/Gemm.hpp b/include/blast/math/algorithm/Gemm.hpp index 108e327..5f5d21f 100644 --- a/include/blast/math/algorithm/Gemm.hpp +++ b/include/blast/math/algorithm/Gemm.hpp @@ -52,11 +52,11 @@ namespace blast M, N, [&] (auto& ker, size_t i, size_t j) { - gemm(ker, K, alpha, A(i, 0), B(0, j), beta, C(i, j), D(i, j)); + gemm(ker, K, alpha, A(i, 0), (~B)(0, j), beta, C(i, j), D(i, j)); }, [&] (auto& ker, size_t i, size_t j, size_t m, size_t n) { - gemm(ker, K, alpha, A(i, 0), B(0, j), beta, C(i, j), D(i, j), m, n); + gemm(ker, K, alpha, A(i, 0), (~B)(0, j), beta, C(i, j), D(i, j), m, n); } ); } diff --git a/include/blast/math/algorithm/Tile.hpp b/include/blast/math/algorithm/Tile.hpp index 3b4efef..b15f2b5 100644 --- a/include/blast/math/algorithm/Tile.hpp +++ b/include/blast/math/algorithm/Tile.hpp @@ -13,8 +13,7 @@ #endif #include - -#include +#include namespace blast @@ -47,7 +46,7 @@ namespace blast * @param f_partial functor to call on partial tiles */ template - inline void tile(Arch arch, StorageOrder traversal_order, std::size_t m, std::size_t n, FF&& f_full, FP&& f_partial) + inline void tile(Arch arch, StorageOrder traversal_order, size_t m, size_t n, FF&& f_full, FP&& f_partial) { detail::tile(arch, traversal_order, m, n, f_full, f_partial); } diff --git a/include/blast/math/algorithm/arch/avx2/Tile.hpp b/include/blast/math/algorithm/arch/avx2/Tile.hpp index f7decfd..322dac2 100644 --- a/include/blast/math/algorithm/arch/avx2/Tile.hpp +++ b/include/blast/math/algorithm/arch/avx2/Tile.hpp @@ -10,8 +10,6 @@ #include -#include - namespace blast :: detail { diff --git a/include/blast/math/dense/DynamicMatrix.hpp b/include/blast/math/dense/DynamicMatrix.hpp index 0b6128d..ea90feb 100644 --- a/include/blast/math/dense/DynamicMatrix.hpp +++ b/include/blast/math/dense/DynamicMatrix.hpp @@ -203,4 +203,12 @@ namespace blast template struct StorageOrderHelper> : std::integral_constant {}; + + + template + struct IsAligned> : std::integral_constant {}; + + + template + struct IsPadded> : std::integral_constant {}; } diff --git a/include/blast/math/dense/DynamicMatrixPointer.hpp b/include/blast/math/dense/DynamicMatrixPointer.hpp index b440e12..d970a67 100644 --- a/include/blast/math/dense/DynamicMatrixPointer.hpp +++ b/include/blast/math/dense/DynamicMatrixPointer.hpp @@ -81,12 +81,6 @@ namespace blast } - SimdVecType broadcast() const noexcept - { - return *ptr_; - } - - void store(SimdVecType const& val) const noexcept { val.store(ptr_, AF); @@ -119,6 +113,20 @@ namespace blast } + /** + * @brief Access element at specified offset + * + * @param i row offset + * @param j column offset + * + * @return reference to the element at specified offset + */ + ElementType& operator[](ptrdiff_t i, ptrdiff_t j) const noexcept + { + return *ptrOffset(i, j); + } + + /** * @brief Get reference to the pointed value. * @@ -210,6 +218,20 @@ namespace blast struct StorageOrderHelper> : std::integral_constant {}; + /** + * @brief Specialization for @a DynamicMatrixPointer + */ + template + struct IsAligned> : std::integral_constant {}; + + + /** + * @brief Specialization for @a DynamicMatrixPointer + */ + template + struct IsPadded> : std::integral_constant {}; + + template BLAST_ALWAYS_INLINE auto trans(DynamicMatrixPointer const& p) noexcept { diff --git a/include/blast/math/dense/DynamicVectorPointer.hpp b/include/blast/math/dense/DynamicVectorPointer.hpp index 6843925..9d3e576 100644 --- a/include/blast/math/dense/DynamicVectorPointer.hpp +++ b/include/blast/math/dense/DynamicVectorPointer.hpp @@ -73,12 +73,6 @@ namespace blast } - SimdVecType broadcast() const noexcept - { - return *ptr_; - } - - void store(IntrinsicType val) const noexcept { // Non-optimized @@ -120,6 +114,19 @@ namespace blast } + /** + * @brief Access element at specified offset + * + * @param i offset + * + * @return reference to the element at specified offset + */ + ElementType& operator[](ptrdiff_t i) const noexcept + { + return *ptrOffset(i); + } + + /** * @brief Get reference to the pointed value. * diff --git a/include/blast/math/dense/StaticMatrix.hpp b/include/blast/math/dense/StaticMatrix.hpp index c000483..1a68d0d 100644 --- a/include/blast/math/dense/StaticMatrix.hpp +++ b/include/blast/math/dense/StaticMatrix.hpp @@ -13,6 +13,7 @@ #include #include +#include namespace blast @@ -214,4 +215,12 @@ namespace blast template struct StorageOrderHelper> : std::integral_constant {}; + + + template + struct IsAligned> : std::integral_constant {}; + + + template + struct IsPadded> : std::integral_constant {}; } diff --git a/include/blast/math/dense/StaticMatrixPointer.hpp b/include/blast/math/dense/StaticMatrixPointer.hpp index bbdd9f9..1c346e7 100644 --- a/include/blast/math/dense/StaticMatrixPointer.hpp +++ b/include/blast/math/dense/StaticMatrixPointer.hpp @@ -42,7 +42,7 @@ namespace blast * */ constexpr StaticMatrixPointer(T * p00, ptrdiff_t i, ptrdiff_t j) noexcept - : ptr_ {p00 + (SO == columnMajor ? i + spacing() * j : spacing() * i + j)} + : ptr_ {ptrOffset(p00, i, j)} { BLAST_USER_ASSERT(!AF || isSimdAligned(ptr_), "Pointer is not aligned"); } @@ -82,12 +82,6 @@ namespace blast } - SimdVecType broadcast() const noexcept - { - return SimdVecType {*ptr_}; - } - - void store(SimdVecType const& val) const noexcept { val.store(ptr_, AF); @@ -114,6 +108,20 @@ namespace blast } + /** + * @brief Access element at specified offset + * + * @param i row offset + * @param j column offset + * + * @return reference to the element at specified offset + */ + ElementType& operator[](ptrdiff_t i, ptrdiff_t j) const noexcept + { + return *ptrOffset(ptr_, i, j); + } + + /** * @brief Get reference to the pointed value. * @@ -186,10 +194,15 @@ namespace blast private: + static T * ptrOffset(T * p, ptrdiff_t i, ptrdiff_t j) noexcept + { + return p + (SO == columnMajor ? i + spacing() * j : spacing() * i + j); + } + + static size_t constexpr SS = SimdVecType::size(); static TransposeFlag constexpr majorOrientation = SO == columnMajor ? columnVector : rowVector; - T * ptr_; }; @@ -201,6 +214,20 @@ namespace blast struct StorageOrderHelper> : std::integral_constant {}; + /** + * @brief Specialization for StaticMatrixPointer + */ + template + struct IsAligned> : std::integral_constant {}; + + + /** + * @brief Specialization for StaticMatrixPointer + */ + template + struct IsPadded> : std::integral_constant {}; + + template BLAZE_ALWAYS_INLINE auto trans(StaticMatrixPointer const& p) noexcept { diff --git a/include/blast/math/dense/StaticVectorPointer.hpp b/include/blast/math/dense/StaticVectorPointer.hpp index 6f7a0e2..8161485 100644 --- a/include/blast/math/dense/StaticVectorPointer.hpp +++ b/include/blast/math/dense/StaticVectorPointer.hpp @@ -81,12 +81,6 @@ namespace blast } - SimdVecType broadcast() const noexcept - { - return *ptr_; - } - - void store(SimdVecType val) const noexcept { if constexpr (S == 1) @@ -131,6 +125,19 @@ namespace blast } + /** + * @brief Access element at specified offset + * + * @param i element offset + * + * @return reference to the element at specified offset + */ + ElementType& operator[](ptrdiff_t i) const noexcept + { + return *ptrOffset(i); + } + + /** * @brief Get reference to the pointed value. * diff --git a/include/blast/math/expressions/MatTransExpr.hpp b/include/blast/math/expressions/MatTransExpr.hpp index 7224af2..a2d1ccb 100644 --- a/include/blast/math/expressions/MatTransExpr.hpp +++ b/include/blast/math/expressions/MatTransExpr.hpp @@ -168,6 +168,15 @@ namespace blast struct IsAligned> : IsAligned {}; + /** + * @brief Specialization for @a MatTransExpr + * + * @tparam MT expression operand type + */ + template + struct IsPadded> : IsPadded {}; + + /** * @brief Specialization for @a MatTransExpr * diff --git a/include/blast/math/expressions/PMatTransExpr.hpp b/include/blast/math/expressions/PMatTransExpr.hpp index 73b4da6..dcdf29a 100644 --- a/include/blast/math/expressions/PMatTransExpr.hpp +++ b/include/blast/math/expressions/PMatTransExpr.hpp @@ -247,6 +247,14 @@ namespace blast struct IsStatic> : IsStatic {}; + template + struct IsAligned> : IsAligned {}; + + + template + struct IsPadded> : IsPadded {}; + + /** * @brief Specialization for @a PMatTransExpr * diff --git a/include/blast/math/panel/DynamicPanelMatrix.hpp b/include/blast/math/panel/DynamicPanelMatrix.hpp index 82994d7..4656d97 100644 --- a/include/blast/math/panel/DynamicPanelMatrix.hpp +++ b/include/blast/math/panel/DynamicPanelMatrix.hpp @@ -160,6 +160,15 @@ namespace blast } + /** + * @brief Set all matrix elements to 0 + */ + void reset() noexcept + { + std::fill_n(v_, spacing_ * (SO == columnMajor ? n_ : m_), Type {}); + } + + private: static size_t constexpr alignment_ = CACHE_LINE_SIZE; static size_t constexpr SS = SimdSize_v; diff --git a/include/blast/math/panel/DynamicPanelMatrixPointer.hpp b/include/blast/math/panel/DynamicPanelMatrixPointer.hpp index b99fd0e..31ae093 100644 --- a/include/blast/math/panel/DynamicPanelMatrixPointer.hpp +++ b/include/blast/math/panel/DynamicPanelMatrixPointer.hpp @@ -95,12 +95,6 @@ namespace blast } - SimdVecType broadcast() const noexcept - { - return SimdVecType {*ptr_}; - } - - void store(SimdVecType const& val) const noexcept { static_assert(AF, "store() implemented only for aligned pointers"); @@ -135,6 +129,20 @@ namespace blast } + /** + * @brief Access element at specified offset + * + * @param i row offset + * @param j column offset + * + * @return reference to element at specified offset + */ + ElementType& operator[](ptrdiff_t i, ptrdiff_t j) const noexcept + { + return *ptrOffset(ptr_, spacing_, i, j); + } + + /** * @brief Get reference to the pointed value. * @@ -244,6 +252,14 @@ namespace blast }; + template + struct IsAligned> : std::integral_constant {}; + + + template + struct IsPadded> : std::integral_constant {}; + + template struct StorageOrderHelper> : std::integral_constant {}; diff --git a/include/blast/math/panel/StaticPanelMatrixPointer.hpp b/include/blast/math/panel/StaticPanelMatrixPointer.hpp index faf8c2e..862982d 100644 --- a/include/blast/math/panel/StaticPanelMatrixPointer.hpp +++ b/include/blast/math/panel/StaticPanelMatrixPointer.hpp @@ -88,12 +88,6 @@ namespace blast } - SimdVecType broadcast() const noexcept - { - return SimdVecType {*ptr_}; - } - - void store(SimdVecType const& val) const noexcept { static_assert(AF, "store() implemented only for aligned pointers"); @@ -122,6 +116,20 @@ namespace blast } + /** + * @brief Access element at specified offset + * + * @param i row offset + * @param j column offset + * + * @return reference to element at specified offset + */ + ElementType& operator[](ptrdiff_t i, ptrdiff_t j) const noexcept + { + return *ptrOffset(ptr_, i, j); + } + + /** * @brief Get reference to the pointed value. * @@ -243,6 +251,14 @@ namespace blast struct StorageOrderHelper> : std::integral_constant {}; + template + struct IsAligned> : std::integral_constant {}; + + + template + struct IsPadded> : std::integral_constant {}; + + template BLAST_ALWAYS_INLINE auto trans(StaticPanelMatrixPointer const& p) noexcept { diff --git a/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp b/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp index caa0eeb..e38ec79 100644 --- a/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp +++ b/include/blast/math/register_matrix/DynamicRegisterMatrix.hpp @@ -407,7 +407,7 @@ namespace blast #pragma unroll for (size_t j = 0; j < N; ++j) if (j < n_) { - SimdVecType bx = b(0, j).broadcast(); + SimdVecType const bx = b[0, j]; #pragma unroll for (size_t i = 0; i < RM; ++i) // TODO: !!! check i against m @@ -430,7 +430,7 @@ namespace blast #pragma unroll for (size_t j = 0; j < N; ++j) if (j < n_) { - SimdVecType bx = b(0, j).broadcast(); + SimdVecType bx = b[0, j]; #pragma unroll for (size_t i = 0; i < RM; ++i) // TODO: !!! check i against m diff --git a/include/blast/math/register_matrix/Gemm.hpp b/include/blast/math/register_matrix/Gemm.hpp index 7454a49..6c1f3b8 100644 --- a/include/blast/math/register_matrix/Gemm.hpp +++ b/include/blast/math/register_matrix/Gemm.hpp @@ -15,6 +15,7 @@ #pragma once #include +#include namespace blast @@ -72,8 +73,18 @@ namespace blast { ker.reset(); + bool constexpr a_columns_aligned = IsAligned_v && IsPadded_v && StorageOrder_v == columnMajor; + bool constexpr b_rows_aligned = IsAligned_v && IsPadded_v && StorageOrder_v == rowMajor; + for (size_t k = 0; k < K; ++k) - ker.ger(column(a(0, k)), row((~b)(k, 0))); + if constexpr (a_columns_aligned && b_rows_aligned) + ker.ger(column(a(0, k)), row(b(k, 0))); + else if constexpr (a_columns_aligned && !b_rows_aligned) + ker.ger(column(a(0, k)), row((~b)(k, 0))); + else if constexpr (!a_columns_aligned && b_rows_aligned) + ker.ger(column((~a)(0, k)), row(b(k, 0))); + else + ker.ger(column((~a)(0, k)), row((~b)(k, 0))); ker *= alpha; ker.axpy(beta, c); @@ -169,4 +180,4 @@ namespace blast ker.store(d, md, nd); } -} \ No newline at end of file +} diff --git a/include/blast/math/register_matrix/RegisterMatrix.hpp b/include/blast/math/register_matrix/RegisterMatrix.hpp index 3285a7a..b21de1c 100644 --- a/include/blast/math/register_matrix/RegisterMatrix.hpp +++ b/include/blast/math/register_matrix/RegisterMatrix.hpp @@ -600,14 +600,14 @@ namespace blast #pragma unroll for (size_t k = 0; k < j; ++k) { - SimdVecType const a_kj = (~A)(k, j).broadcast(); + SimdVecType const a_kj = A[k, j]; #pragma unroll for (size_t i = 0; i < RM; ++i) v_[i][j] = fnmadd(a_kj, v_[i][k], v_[i][j]); } - IntrinsicType const a_jj = (~A)(j, j).broadcast(); + SimdVecType const a_jj = A[j, j]; #pragma unroll for (size_t i = 0; i < RM; ++i) @@ -645,7 +645,7 @@ namespace blast #pragma unroll for (size_t j = 0; j < N; ++j) { - SimdVecType const bx {*(~b)(j)}; + SimdVecType const bx = b[j]; #pragma unroll for (size_t i = 0; i < RM; ++i) @@ -672,7 +672,7 @@ namespace blast #pragma unroll for (size_t j = 0; j < N; ++j) { - SimdVecType bx = (~b)(j).broadcast(); + SimdVecType const bx = b[j]; #pragma unroll for (size_t i = 0; i < RM; ++i) @@ -697,7 +697,7 @@ namespace blast #pragma unroll for (size_t j = 0; j < N; ++j) if (j < n) { - SimdVecType bx = (~b)(j).broadcast(); + SimdVecType const bx = b[j]; #pragma unroll for (size_t i = 0; i < RM; ++i) @@ -722,7 +722,7 @@ namespace blast #pragma unroll for (size_t j = 0; j < N; ++j) if (j < n) { - SimdVecType bx = (~b)(j).broadcast(); + SimdVecType const bx = b[j]; #pragma unroll for (size_t i = 0; i < RM; ++i) @@ -793,7 +793,7 @@ namespace blast #pragma unroll for (size_t j = 0; j < N; ++j) { - SimdVecType bx = bu(0, j).broadcast(); + SimdVecType const bx = bu[0, j]; #pragma unroll for (size_t i = 0; i < ii; ++i) @@ -840,7 +840,7 @@ namespace blast #pragma unroll for (size_t j = 0; j <= k; ++j) { - SimdVecType ax = au(0, j).broadcast(); + SimdVecType const ax = au[0, j]; #pragma unroll for (size_t i = 0; i < RM; ++i) @@ -887,7 +887,7 @@ namespace blast #pragma unroll for (size_t j = 0; j <= k; ++j) { - SimdVecType ax = au(0, j).broadcast(); + SimdVecType const ax = au[0, j]; #pragma unroll for (size_t i = 0; i < RM; ++i) diff --git a/include/blast/math/typetraits/IsAligned.hpp b/include/blast/math/typetraits/IsAligned.hpp index 5b3e7d8..93e04d9 100644 --- a/include/blast/math/typetraits/IsAligned.hpp +++ b/include/blast/math/typetraits/IsAligned.hpp @@ -1,28 +1,35 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once -#include - namespace blast { + /** + * @brief Tests if the given matrix or vector type storage is aligned. + * + * @tparam T matrix or vector type + */ + template + struct IsAligned; + + + /** + * @brief Specialization for const types + * + * @tparam T matrix or vector type + */ template - struct IsAligned : blaze::IsAligned {}; + struct IsAligned : IsAligned {}; + /** + * @brief Shortcut for @a IsAligned::value + * + * @tparam T matrix or vector type + */ template bool constexpr IsAligned_v = IsAligned::value; -} \ No newline at end of file +} diff --git a/include/blast/math/typetraits/IsPadded.hpp b/include/blast/math/typetraits/IsPadded.hpp index 865f6dd..66fb5a7 100644 --- a/include/blast/math/typetraits/IsPadded.hpp +++ b/include/blast/math/typetraits/IsPadded.hpp @@ -1,23 +1,35 @@ -// Copyright 2023 Mikhail Katliar -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Copyright 2024 Mikhail Katliar. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. #pragma once -#include - namespace blast { - using blaze::IsPadded_v; -} \ No newline at end of file + /** + * @brief Tests if the given matrix or vector type storage is padded. + * + * @tparam T matrix or vector type + */ + template + struct IsPadded; + + + /** + * @brief Specialization for const types + * + * @tparam T matrix or vector type + */ + template + struct IsPadded : IsPadded {}; + + + /** + * @brief Shortcut for @a IsPadded::value + * + * @tparam T matrix or vector type + */ + template + bool constexpr IsPadded_v = IsPadded::value; +} diff --git a/include/blast/math/typetraits/MatrixPointer.hpp b/include/blast/math/typetraits/MatrixPointer.hpp index 5080074..9b1b42a 100644 --- a/include/blast/math/typetraits/MatrixPointer.hpp +++ b/include/blast/math/typetraits/MatrixPointer.hpp @@ -15,8 +15,8 @@ namespace blast concept MatrixPointer = requires(P p, std::ptrdiff_t i, std::ptrdiff_t j) { p(i, j); + p[i, j]; p.load(); - p.broadcast(); p.vmove(i); p.hmove(j); p.spacing(); @@ -28,4 +28,4 @@ namespace blast // {p.get()} -> std::same_as; }; -} \ No newline at end of file +} diff --git a/include/blast/math/typetraits/VectorPointer.hpp b/include/blast/math/typetraits/VectorPointer.hpp index b678381..3b00aa1 100644 --- a/include/blast/math/typetraits/VectorPointer.hpp +++ b/include/blast/math/typetraits/VectorPointer.hpp @@ -15,8 +15,8 @@ namespace blast concept VectorPointer = requires(P p, std::ptrdiff_t i) { p(i); + p[i]; p.load(); - p.broadcast(); p.spacing(); p.trans(); ~p; diff --git a/include/blast/math/views/Submatrix.hpp b/include/blast/math/views/Submatrix.hpp index a249ff5..3cef794 100644 --- a/include/blast/math/views/Submatrix.hpp +++ b/include/blast/math/views/Submatrix.hpp @@ -173,6 +173,15 @@ namespace blast struct IsAligned> : std::integral_constant {}; + /** + * @brief Specialization for @a Submatrix class + * + * Submatrices can't be padded + */ + template + struct IsPadded> : std::integral_constant {}; + + /** * @brief Specialization for @a Submatrix class */ diff --git a/test/blast/math/panel/GemmTest.cpp b/test/blast/math/panel/GemmTest.cpp index 7d9be8d..5af8f86 100644 --- a/test/blast/math/panel/GemmTest.cpp +++ b/test/blast/math/panel/GemmTest.cpp @@ -8,11 +8,10 @@ #include #include #include - -#include +#include +#include #include -#include #include @@ -39,31 +38,22 @@ namespace blast :: testing { for (size_t K = 1; K <= K_max; ++K) { - // Init Blaze matrices - // - blaze::DynamicMatrix blaze_A(M, K), blaze_B(N, K), blaze_C(M, N), blaze_D(M, N); - randomize(blaze_A); - randomize(blaze_B); - randomize(blaze_C); - - // Init BLAST matrices - // DynamicPanelMatrix A(M, K); DynamicPanelMatrix B(N, K); DynamicPanelMatrix C(M, N); DynamicPanelMatrix D(M, N); - A = blaze_A; - B = blaze_B; - C = blaze_C; + randomize(A); + randomize(B); + randomize(C); // Do gemm with BLAST gemm(A, trans(B), C, D); - // Copy the resulting D matrix from BLAST to Blaze - blaze_D = D; + DynamicPanelMatrix D_ref(M, N); + reference::gemm(1., A, trans(B), 1., C, D_ref); - BLAST_ASSERT_APPROX_EQ(blaze_D, evaluate(blaze_C + blaze_A * trans(blaze_B)), absTol(), relTol()) + BLAST_ASSERT_APPROX_EQ(D, D_ref, absTol(), relTol()) << "gemm error at size m,n,k=" << M << "," << N << "," << K; } } diff --git a/test/blast/math/panel/PotrfTest.cpp b/test/blast/math/panel/PotrfTest.cpp index 0c01118..21a2654 100644 --- a/test/blast/math/panel/PotrfTest.cpp +++ b/test/blast/math/panel/PotrfTest.cpp @@ -3,14 +3,16 @@ // license that can be found in the LICENSE file. #include +#include #include #include -#include +#include +#include #include -#include #include + namespace blast :: testing { template @@ -29,13 +31,8 @@ namespace blast :: testing for (size_t M = 0; M <= 50; ++M) { - // Init matrices - // - blaze::DynamicMatrix blaze_A(M, M); - makePositiveDefinite(blaze_A); - DynamicPanelMatrix A(M, M), L(M, M), A1(M, M); - A = blaze_A; + makePositiveDefinite(A); // Do potrf potrf(A, L); @@ -46,8 +43,8 @@ namespace blast :: testing EXPECT_NEAR(L(i, j), Real {}, absTol()); // Check A == L * trans(L) - A1 = 0.; - gemm(L, trans(L), A1, A1); + reset(A1); + reference::gemm(1., L, trans(L), 0., A1, A1); BLAST_EXPECT_APPROX_EQ(A1, A, absTol(), relTol()) << "potrf error for size " << M; } diff --git a/test/blast/math/simd/RegisterMatrixTest.cpp b/test/blast/math/simd/RegisterMatrixTest.cpp index dcacdab..9937699 100644 --- a/test/blast/math/simd/RegisterMatrixTest.cpp +++ b/test/blast/math/simd/RegisterMatrixTest.cpp @@ -354,7 +354,7 @@ namespace blast :: testing reference::ger(rows(C), columns(C), 1., column(ptr(A)), column(ptr(B)).trans(), ptr(C), ptr(C)); - BLAST_EXPECT_EQ(ker, C); + BLAST_EXPECT_APPROX_EQ(ker, C, absTol(), relTol()); } @@ -411,7 +411,8 @@ namespace blast :: testing for (size_t i = 0; i < m; ++i) for (size_t j = 0; j < n; ++j) - ASSERT_EQ(ker(i, j), i < m && j < n ? D(i, j) : 0.) << "element mismatch at (" << i << ", " << j << "), " + BLAST_ASSERT_APPROX_EQ(ker(i, j), D(i, j), absTol(), relTol()) + << "element mismatch at (" << i << ", " << j << "), " << "store size = " << m << "x" << n; } } @@ -474,7 +475,7 @@ namespace blast :: testing StaticMatrix D_ref; reference::ger(Traits::rows, Traits::columns, 1., ptr(a), ptr(trans(b)), ptr(C), ptr(D_ref)); - BLAST_EXPECT_EQ(D, D_ref); + BLAST_EXPECT_APPROX_EQ(D, D_ref, absTol(), relTol()); } @@ -650,7 +651,7 @@ namespace blast :: testing StaticMatrix C; reference::axpy(alpha, A, B, C); - EXPECT_EQ(ker, C); + BLAST_EXPECT_APPROX_EQ(ker, C, absTol(), relTol()); } @@ -684,7 +685,7 @@ namespace blast :: testing for (size_t i = 0; i < m; ++i) for (size_t j = 0; j < n; ++j) - ASSERT_EQ(ker(i, j), C(i, j)) + BLAST_ASSERT_APPROX_EQ(ker(i, j), C(i, j), absTol(), relTol()) << "element mismatch at (" << i << ", " << j << "), " << "axpy() size = " << m << "x" << n; }