Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix gemm() performance for panel matrices #43

Merged
merged 12 commits into from
Oct 15, 2024
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 3.10) # Need at least 3.10 for gtest_discover_t
project(blast VERSION 0.1 LANGUAGES CXX)

# Enable modern C++
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD 23)

# Allow for integration with other tools such as Intellisense
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
Expand Down
1 change: 1 addition & 0 deletions bench/blast/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ add_executable(bench-blast
math/panel/DynamicGemm.cpp
math/panel/StaticPotrf.cpp
math/panel/DynamicPotrf.cpp
math/panel/StaticMatrixPointer.cpp
)

target_compile_definitions(bench-blast
Expand Down
34 changes: 34 additions & 0 deletions bench/blast/math/panel/StaticMatrixPointer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include <blast/math/StaticPanelMatrix.hpp>

#include <bench/Benchmark.hpp>


namespace blast :: benchmark
{
template <typename Real, AlignmentFlag AF>
static void BM_static_panel_matrix_pointer(State& state)
{
size_t constexpr M = 8;
size_t constexpr N = 4;

StaticPanelMatrix<Real, M, N, columnMajor> A;
auto pA = ptr<AF>(A, 0, 0);

for (auto _ : state)
{
for (size_t i = 0; i < M; ++i)
for (size_t j = 0; j < N; ++j)
DoNotOptimize(pA(i, j));
}
}


BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, double, aligned);
BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, double, unaligned);
BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, float, aligned);
BENCHMARK_TEMPLATE(BM_static_panel_matrix_pointer, float, unaligned);
}
2 changes: 1 addition & 1 deletion bench/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ target_link_libraries(bench-blast-common
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# More aggressive inlining with Clang
target_compile_options(bench-blast-common
PUBLIC "-mllvm" "-inline-threshold=1000"
PUBLIC "-mllvm" "-inline-threshold=4000"
)
endif()
2 changes: 2 additions & 0 deletions include/blast/blaze/math/TypeTraits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,7 @@
#include <blast/blaze/math/typetraits/IsStaticallySpaced.hpp>
#include <blast/blaze/math/typetraits/IsDenseVector.hpp>
#include <blast/blaze/math/typetraits/IsDenseMatrix.hpp>
#include <blast/blaze/math/typetraits/IsAligned.hpp>
#include <blast/blaze/math/typetraits/IsPadded.hpp>
#include <blast/blaze/math/typetraits/Spacing.hpp>
#include <blast/blaze/math/typetraits/StorageOrder.hpp>
24 changes: 24 additions & 0 deletions include/blast/blaze/math/typetraits/IsAligned.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/math/typetraits/IsAligned.hpp>

#include <blaze/math/typetraits/IsVector.h>
#include <blaze/math/typetraits/IsMatrix.h>
#include <blaze/math/typetraits/IsAligned.h>


namespace blast
{
/**
* @brief Specialization for Blaze matrix and vector types
*
* @tparam T matrix or vector type
*/
template <typename T>
requires blaze::IsVector_v<T> || blaze::IsMatrix_v<T>
struct IsAligned<T> : blaze::IsAligned<T> {};
}
24 changes: 24 additions & 0 deletions include/blast/blaze/math/typetraits/IsPadded.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright 2024 Mikhail Katliar. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

#include <blast/math/typetraits/IsPadded.hpp>

#include <blaze/math/typetraits/IsVector.h>
#include <blaze/math/typetraits/IsMatrix.h>
#include <blaze/math/typetraits/IsPadded.h>


namespace blast
{
/**
* @brief Specialization for Blaze matrix and vector types
*
* @tparam T matrix or vector type
*/
template <typename T>
requires blaze::IsVector_v<T> || blaze::IsMatrix_v<T>
struct IsPadded<T> : blaze::IsPadded<T> {};
}
19 changes: 7 additions & 12 deletions include/blast/math/DynamicPanelMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,8 +329,6 @@ namespace blaze
, bool SO >
void makePositiveDefinite( DynamicPanelMatrix<Type, SO>& matrix )
{
using blaze::randomize;

BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE( Type );

if( !isSquare( *matrix ) ) {
Expand All @@ -349,16 +347,13 @@ namespace blaze

gemm(A, trans(A), matrix, matrix);

// TODO: implement it as below after the matrix *= ctrans( matrix ) expression works.

// randomize( matrix );
// matrix *= ctrans( matrix );

// for( size_t i=0UL; i<n; ++i ) {
// matrix(i,i) += Type(n);
// }

BLAZE_INTERNAL_ASSERT( isHermitian( matrix ), "Non-symmetric matrix detected" );
// NOTE: if uncommented, the following line results in a compiler error:
// /usr/local/include/blaze/math/Matrix.h:203:13: note: candidate template ignored: invalid explicitly-specified argument for template parameter 'MT'
// 203 | inline bool isHermitian( const Matrix<MT,SO>& m )
// I could not figure out what causes it, but we are going to decouple DynamicPanelMatrix from Blaze,
// so this code will be gone anyway.
//
// BLAZE_INTERNAL_ASSERT( isHermitian( matrix ), "Non-symmetric matrix detected" );
}
/*! \endcond */
//*************************************************************************************************
Expand Down
32 changes: 9 additions & 23 deletions include/blast/math/RowColumnVectorPointer.hpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
// Copyright 2023 Mikhail Katliar
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Copyright (c) 2019-2020 Mikhail Katliar All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#pragma once

Expand Down Expand Up @@ -78,12 +68,6 @@ namespace blast
}


SimdVecType broadcast() const noexcept
{
return ptr_.broadcast();
}


void store(SimdVecType const& val) const noexcept
{
ptr_.store(transposeFlag, val);
Expand Down Expand Up @@ -123,13 +107,15 @@ namespace blast


/**
* @brief Get reference to the pointed value.
* @brief Access element at specified offset
*
* @return reference to the pointed value
* @param i offset
*
* @return reference to the element at specified offset
*/
ElementType& operator*() noexcept
ElementType& operator[](ptrdiff_t i) const noexcept
{
return *ptr_;
return transposeFlag == columnVector ? ptr_[i, 0] : ptr_[0, i];
}


Expand Down
4 changes: 2 additions & 2 deletions include/blast/math/algorithm/Gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ namespace blast
M, N,
[&] (auto& ker, size_t i, size_t j)
{
gemm(ker, K, alpha, A(i, 0), B(0, j), beta, C(i, j), D(i, j));
gemm(ker, K, alpha, A(i, 0), (~B)(0, j), beta, C(i, j), D(i, j));
},
[&] (auto& ker, size_t i, size_t j, size_t m, size_t n)
{
gemm(ker, K, alpha, A(i, 0), B(0, j), beta, C(i, j), D(i, j), m, n);
gemm(ker, K, alpha, A(i, 0), (~B)(0, j), beta, C(i, j), D(i, j), m, n);
}
);
}
Expand Down
5 changes: 2 additions & 3 deletions include/blast/math/algorithm/Tile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
#endif

#include <blast/math/StorageOrder.hpp>

#include <cstdlib>
#include <blast/util/Types.hpp>


namespace blast
Expand Down Expand Up @@ -47,7 +46,7 @@ namespace blast
* @param f_partial functor to call on partial tiles
*/
template <typename ET, StorageOrder SO, typename FF, typename FP, typename Arch>
inline void tile(Arch arch, StorageOrder traversal_order, std::size_t m, std::size_t n, FF&& f_full, FP&& f_partial)
inline void tile(Arch arch, StorageOrder traversal_order, size_t m, size_t n, FF&& f_full, FP&& f_partial)
{
detail::tile<ET, SO>(arch, traversal_order, m, n, f_full, f_partial);
}
Expand Down
2 changes: 0 additions & 2 deletions include/blast/math/algorithm/arch/avx2/Tile.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

#include <blast/math/Simd.hpp>

#include <cstdlib>


namespace blast :: detail
{
Expand Down
8 changes: 8 additions & 0 deletions include/blast/math/dense/DynamicMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,12 @@ namespace blast

template <typename T, bool SO>
struct StorageOrderHelper<DynamicMatrix<T, SO>> : std::integral_constant<StorageOrder, StorageOrder(SO)> {};


template <typename T, bool SO>
struct IsAligned<DynamicMatrix<T, SO>> : std::integral_constant<bool, true> {};


template <typename T, bool SO>
struct IsPadded<DynamicMatrix<T, SO>> : std::integral_constant<bool, true> {};
}
34 changes: 28 additions & 6 deletions include/blast/math/dense/DynamicMatrixPointer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,6 @@ namespace blast
}


SimdVecType broadcast() const noexcept
{
return *ptr_;
}


void store(SimdVecType const& val) const noexcept
{
val.store(ptr_, AF);
Expand Down Expand Up @@ -119,6 +113,20 @@ namespace blast
}


/**
* @brief Access element at specified offset
*
* @param i row offset
* @param j column offset
*
* @return reference to the element at specified offset
*/
ElementType& operator[](ptrdiff_t i, ptrdiff_t j) const noexcept
{
return *ptrOffset(i, j);
}


/**
* @brief Get reference to the pointed value.
*
Expand Down Expand Up @@ -210,6 +218,20 @@ namespace blast
struct StorageOrderHelper<DynamicMatrixPointer<T, SO, AF, PF>> : std::integral_constant<StorageOrder, StorageOrder(SO)> {};


/**
* @brief Specialization for @a DynamicMatrixPointer
*/
template <typename T, bool SO, bool AF, bool PF>
struct IsAligned<DynamicMatrixPointer<T, SO, AF, PF>> : std::integral_constant<bool, AF> {};


/**
* @brief Specialization for @a DynamicMatrixPointer
*/
template <typename T, bool SO, bool AF, bool PF>
struct IsPadded<DynamicMatrixPointer<T, SO, AF, PF>> : std::integral_constant<bool, PF> {};


template <typename T, bool SO, bool AF, bool PF>
BLAST_ALWAYS_INLINE auto trans(DynamicMatrixPointer<T, SO, AF, PF> const& p) noexcept
{
Expand Down
19 changes: 13 additions & 6 deletions include/blast/math/dense/DynamicVectorPointer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ namespace blast
}


SimdVecType broadcast() const noexcept
{
return *ptr_;
}


void store(IntrinsicType val) const noexcept
{
// Non-optimized
Expand Down Expand Up @@ -120,6 +114,19 @@ namespace blast
}


/**
* @brief Access element at specified offset
*
* @param i offset
*
* @return reference to the element at specified offset
*/
ElementType& operator[](ptrdiff_t i) const noexcept
{
return *ptrOffset(i);
}


/**
* @brief Get reference to the pointed value.
*
Expand Down
9 changes: 9 additions & 0 deletions include/blast/math/dense/StaticMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <blast/util/Types.hpp>

#include <initializer_list>
#include <type_traits>


namespace blast
Expand Down Expand Up @@ -214,4 +215,12 @@ namespace blast

template <typename T, size_t M, size_t N, bool SO>
struct StorageOrderHelper<StaticMatrix<T, M, N, SO>> : std::integral_constant<StorageOrder, StorageOrder(SO)> {};


template <typename T, size_t M, size_t N, bool SO>
struct IsAligned<StaticMatrix<T, M, N, SO>> : std::integral_constant<bool, true> {};


template <typename T, size_t M, size_t N, bool SO>
struct IsPadded<StaticMatrix<T, M, N, SO>> : std::integral_constant<bool, true> {};
}
Loading
Loading