diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 92e4f4264a..4f1096001a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -337,11 +337,22 @@ set( MIOpen_Source solver/softmarginloss/forward_softmarginloss.cpp solver/softmax/attn_softmax.cpp solver/softmax/softmax.cpp + solver/tensorOp/Op1dTensorGeneric.cpp + solver/tensorOp/Op2dTensorGeneric.cpp + solver/tensorOp/Op2dTensorLite.cpp + solver/tensorOp/Op2dTensorSquash.cpp + solver/tensorOp/Op3dTensorGeneric.cpp + solver/tensorOp/OpTensorFwdBias.cpp + solver/tensorOp/Op4dTensorLite.cpp + solver/tensorOp/OpTensorLeadingOnes.cpp + solver/tensorOp/Op4dTensorGeneric.cpp + solver/tensorOp/Op5dTensorGeneric.cpp subbuffers.cpp t5layernorm_api.cpp target_properties.cpp temp_file.cpp tensor.cpp + tensorOp/problem_description.cpp tensor_api.cpp transformers_adam_w_api.cpp seq_tensor.cpp diff --git a/src/include/miopen/names.hpp b/src/include/miopen/names.hpp index 17b96b8732..bdf59c361c 100644 --- a/src/include/miopen/names.hpp +++ b/src/include/miopen/names.hpp @@ -34,6 +34,7 @@ struct NetworkConfig { NetworkConfig() = default; explicit NetworkConfig(const std::string& value_) : value(value_) {} + explicit NetworkConfig(std::string&& value_) noexcept : value(std::move(value_)) {} operator std::string() const { return value; } const std::string& ToString() const { return value; } diff --git a/src/include/miopen/tensorOp/invoke_params.hpp b/src/include/miopen/tensorOp/invoke_params.hpp new file mode 100644 index 0000000000..6b8f2ca88c --- /dev/null +++ b/src/include/miopen/tensorOp/invoke_params.hpp @@ -0,0 +1,78 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +namespace miopen { + +namespace tensorOp { + +struct InvokeParams : public miopen::InvokeParams +{ + InvokeParams(const void* alpha0_, + ConstData_t ATensor_, + const void* alpha1_, + ConstData_t BTensor_, + const void* beta_, + Data_t CTensor_, + const size_t Aoffset_, + const size_t Boffset_, + const size_t Coffset_) + : alpha0(alpha0_), + alpha1(alpha1_), + beta(beta_), + ATensor(ATensor_), + BTensor(BTensor_), + CTensor(CTensor_), + Aoffset(Aoffset_), + Boffset(Boffset_), + Coffset(Coffset_) + { + } + + size_t GetWorkspaceSize() const { return 0; } + Data_t GetWorkspace() const { return nullptr; } + +public: + const void* alpha0; + const void* alpha1; + const void* beta; + + ConstData_t ATensor; + ConstData_t BTensor; + Data_t CTensor; + + size_t Aoffset; + size_t Boffset; + size_t Coffset; +}; + +} // namespace tensorOp + +} // namespace miopen diff --git a/src/include/miopen/tensorOp/problem_description.hpp b/src/include/miopen/tensorOp/problem_description.hpp new file mode 100644 index 0000000000..dc60a3c7c9 --- /dev/null +++ b/src/include/miopen/tensorOp/problem_description.hpp @@ -0,0 +1,130 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +namespace miopen { + +struct NetworkConfig; + +namespace tensorOp { + +struct ProblemDescription : ProblemDescriptionBase +{ + ProblemDescription(const miopenTensorOp_t tensorOp_, + const void* beta_, + const TensorDescriptor& aTensorDesc_, + const TensorDescriptor& bTensorDesc_, + const TensorDescriptor& cTensorDesc_, + const bool nonStandardSquash_) + : tensorOp(tensorOp_), + aTensorDesc(aTensorDesc_), + bTensorDesc(bTensorDesc_), + cTensorDesc(cTensorDesc_), + nonStandardSquash(nonStandardSquash_) + { + if(beta_ == nullptr) + { + MIOPEN_THROW(miopenStatusBadParm, "Beta value is nullptr"); + } + + beta = *(static_cast(beta_)); + + if(aTensorDesc.GetElementSize() != cTensorDesc.GetElementSize()) + { + MIOPEN_THROW("A and C Tensors do not match"); + } + + if(bTensorDesc.GetType() != cTensorDesc.GetType()) + { + MIOPEN_THROW("Datatypes for B and C tensors do not match !"); + } + + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + if(clens.size() > 5) + { + MIOPEN_THROW("Tensor dimension larger than 5: " + std::to_string(clens.size())); + } + + if(blens.size() != clens.size()) + { + MIOPEN_THROW("Number of dims in B and C Tensors do not match: " + + std::to_string(blens.size()) + ", " + std::to_string(clens.size())); + } + + if(!nonStandardSquash) + { + constexpr auto comparator = [](size_t c, size_t b) { return b == 1 || b == c; }; + const auto [c_diff, b_diff] = + std::mismatch(clens.begin(), clens.end(), blens.begin(), comparator); + if(c_diff != clens.end()) + MIOPEN_THROW("BTensor dim != 1 && BTensor dim != CTensor dim:" + + std::to_string(std::distance(clens.begin(), c_diff))); + } + else + { + // non standard behavior because blens[1] can be not equalt to clens[1] + if(!(clens.size() == 3 && blens[0] == 1 && clens[0] == 1 && blens[2] == clens[2])) + { + MIOPEN_THROW( + "Non standard squashed operation supported only for 3d tensors and for " + "the specific configuration"); + } + } + } + + miopenTensorOp_t GetTensorOp() const { return tensorOp; } + + float GetBeta() const { return beta; } + + const TensorDescriptor& GetATensorDesc() const { return aTensorDesc; } + const TensorDescriptor& GetBTensorDesc() const { return bTensorDesc; } + const TensorDescriptor& GetCTensorDesc() const { return cTensorDesc; } + + bool GetNonStandardSquash() const { return nonStandardSquash; } + + NetworkConfig MakeNetworkConfig() const override; + +private: + const miopenTensorOp_t tensorOp; + + float beta; + + TensorDescriptor aTensorDesc; + TensorDescriptor bTensorDesc; + TensorDescriptor cTensorDesc; + + const bool nonStandardSquash; +}; + +} // namespace tensorOp + +} // namespace miopen diff --git a/src/include/miopen/tensorOp/solvers.hpp b/src/include/miopen/tensorOp/solvers.hpp new file mode 100644 index 0000000000..635d0ab777 --- /dev/null +++ b/src/include/miopen/tensorOp/solvers.hpp @@ -0,0 +1,216 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +using TensorOpSolver = NonTunableSolverBase; + +struct Op1dTensorGeneric final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct Op2dTensorGeneric final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct Op2dTensorLite final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct Op2dTensorSquash final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct Op3dTensorGeneric final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct OpTensorFwdBias final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct Op4dTensorLite final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct OpTensorLeadingOnes final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct Op4dTensorGeneric final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +struct Op5dTensorGeneric final : TensorOpSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } +}; + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/include/miopen/tensor_ops.hpp b/src/include/miopen/tensor_ops.hpp index 25d838598b..c19eb333f2 100644 --- a/src/include/miopen/tensor_ops.hpp +++ b/src/include/miopen/tensor_ops.hpp @@ -189,6 +189,22 @@ MIOPEN_INTERNALS_EXPORT void OpTensor(const Handle& handle, size_t Coffset = 0, bool nonStandardSquash = false); +MIOPEN_INTERNALS_EXPORT void OpTensor2(Handle& handle, + miopenTensorOp_t tensorOp, + const void* alpha0, + const TensorDescriptor& aTensorDesc, + ConstData_t ATensor, + const void* alpha1, + const TensorDescriptor& bTensorDesc, + ConstData_t BTensor, + const void* beta, + const TensorDescriptor& cTensorDesc, + Data_t CTensor, + size_t Aoffset = 0, + size_t Boffset = 0, + size_t Coffset = 0, + bool nonStandardSquash = false); + MIOPEN_INTERNALS_EXPORT void CopyTensor(const Handle& handle, const TensorDescriptor& srcDesc, ConstData_t src, diff --git a/src/kernels/MIOpenTensorKernels.cl b/src/kernels/MIOpenTensorKernels.cl index cc47d8e6ce..842d3d4d6b 100644 --- a/src/kernels/MIOpenTensorKernels.cl +++ b/src/kernels/MIOpenTensorKernels.cl @@ -24,24 +24,6 @@ * *******************************************************************************/ -#if MIOPEN_USE_FP16 == 1 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -#define _FLOAT half -#ifndef HALF_MAX -#define MAX_VAL 65504 /* max value */ -#else -#define MAX_VAL HALF_MAX -#endif -#endif -#if MIOPEN_USE_FP32 == 1 -#define _FLOAT float -#ifndef FLT_MAX -#define MAX_VAL 3.402823466e+38F /* max value */ -#else -#define MAX_VAL FLT_MAX -#endif -#endif - /* Only works for NCHW * bitmap tracks which dims are the same between 'a' and 'c'. * Example: 0, 1, 1, 0 means that C and H dims are the same and the rest are ones diff --git a/src/solver/tensorOp/Op1dTensorGeneric.cpp b/src/solver/tensorOp/Op1dTensorGeneric.cpp new file mode 100644 index 0000000000..896d75d50c --- /dev/null +++ b/src/solver/tensorOp/Op1dTensorGeneric.cpp @@ -0,0 +1,173 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op1dTensorGeneric::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& alens = aTensorDesc.GetLengths(); + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 1) + { + return true; + } + + return false; +} + +std::size_t Op1dTensorGeneric::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution +Op1dTensorGeneric::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const size_t b_n = bTensorDesc.GetLengths()[0]; + const size_t c_n = cTensorDesc.GetLengths()[0]; + + const size_t a_nstrides = aTensorDesc.GetStrides()[0]; + const size_t b_nstrides = bTensorDesc.GetStrides()[0]; + const size_t c_nstrides = cTensorDesc.GetStrides()[0]; + + miopenDataType_t data_type = bTensorDesc.GetType(); + bool fit_into_int = aTensorDesc.AllDimsFitIntoInt(); + + size_t local_threads = 256; + size_t max_num_wg = 4096; + + auto num_wg = std::clamp(c_n / local_threads, size_t(1), size_t(max_num_wg)); + num_wg = num_wg > max_num_wg ? max_num_wg : num_wg; + size_t global_threads = num_wg * local_threads; + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, true); + + build_params.Define("USE_1D_TENSOR_GENERIC"); + + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); + kernel.kernel_file = "MIOpenTensorKernelsHip.cpp"; + kernel.kernel_name = "Op1dTensorGeneric"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = + [data_type, fit_into_int, b_n, c_n, a_nstrides, b_nstrides, c_nstrides]( + const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + if(fit_into_int) + { + kernel(params.ATensor, + params.BTensor, + params.CTensor, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(a_nstrides), + static_cast(b_n == 1 ? 0 : b_nstrides), + static_cast(c_nstrides), + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(c_n), + !float_equal(miopen_beta, 0.0)); + } + else + { + kernel(params.ATensor, + params.BTensor, + params.CTensor, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(a_nstrides), + static_cast(b_n == 1 ? 0 : b_nstrides), + static_cast(c_nstrides), + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(c_n), + !float_equal(miopen_beta, 0.0)); + } + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/Op2dTensorGeneric.cpp b/src/solver/tensorOp/Op2dTensorGeneric.cpp new file mode 100644 index 0000000000..41fca78068 --- /dev/null +++ b/src/solver/tensorOp/Op2dTensorGeneric.cpp @@ -0,0 +1,186 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op2dTensorGeneric::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& alens = aTensorDesc.GetLengths(); + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 2) + { + return true; + } + + return false; +} + +std::size_t Op2dTensorGeneric::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution +Op2dTensorGeneric::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + std::array blens; + std::array clens; + std::tie(blens[0], blens[1]) = miopen::tien<2>(bTensorDesc.GetLengths()); + std::tie(clens[0], clens[1]) = miopen::tien<2>(cTensorDesc.GetLengths()); + + std::array astrides; + std::array bstrides; + std::array cstrides; + std::tie(astrides[0], astrides[1]) = miopen::tien<2>(aTensorDesc.GetStrides()); + std::tie(bstrides[0], bstrides[1]) = miopen::tien<2>(bTensorDesc.GetStrides()); + std::tie(cstrides[0], cstrides[1]) = miopen::tien<2>(cTensorDesc.GetStrides()); + + miopenDataType_t data_type = bTensorDesc.GetType(); + bool fit_into_int = aTensorDesc.AllDimsFitIntoInt(); + + size_t local_threads = 32; + size_t max_num_wg = 4096; + + auto num_wg = std::clamp((clens[0] * clens[1]) / local_threads, size_t(1), size_t(max_num_wg)); + num_wg = num_wg > max_num_wg ? max_num_wg : num_wg; + size_t global_threads = num_wg * local_threads; + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, true); + + build_params.Define("USE_2D_TENSOR_GENERIC"); + + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); + kernel.kernel_file = "MIOpenTensorKernelsHip.cpp"; + kernel.kernel_name = "Op2dTensorGeneric"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = [data_type, fit_into_int, blens, clens, astrides, bstrides, cstrides]( + const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + if(fit_into_int) + { + kernel(params.ATensor, + params.BTensor, + params.CTensor, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(blens[1] == 1 ? clens[1] : blens[1]), + static_cast(clens[1]), + static_cast(astrides[0]), + static_cast(astrides[1]), + static_cast(blens[0] == 1 ? 0 : bstrides[0]), + static_cast(blens[1] == 1 ? 0 : bstrides[1]), + static_cast(cstrides[0]), + static_cast(cstrides[1]), + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(clens[0]), + !float_equal(miopen_beta, 0.0)); + } + else + { + kernel(params.ATensor, + params.BTensor, + params.CTensor, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(blens[1] == 1 ? clens[1] : blens[1]), + static_cast(clens[1]), + static_cast(astrides[0]), + static_cast(astrides[1]), + static_cast(blens[0] == 1 ? 0 : bstrides[0]), + static_cast(blens[1] == 1 ? 0 : bstrides[1]), + static_cast(cstrides[0]), + static_cast(cstrides[1]), + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(clens[0]), + !float_equal(miopen_beta, 0.0)); + } + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/Op2dTensorLite.cpp b/src/solver/tensorOp/Op2dTensorLite.cpp new file mode 100644 index 0000000000..2b7b030a2f --- /dev/null +++ b/src/solver/tensorOp/Op2dTensorLite.cpp @@ -0,0 +1,193 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" + +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op2dTensorLite::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& alens = aTensorDesc.GetLengths(); + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 3) + { + size_t local_threads = 256; + int max_num_wg = 4096; + + // for naive tensor ops + size_t RD_BLCK = (clens[2] % 4 == 0) ? 4 : (clens[2] % 2 == 0) ? 2 : 1; + size_t total_work = std::max(clens[2] / RD_BLCK, size_t(1)); + size_t grp_sz = (total_work + local_threads - 1) / local_threads; + + // opencl kernels are no longer supported, fallback to generic case + bool lite_applicable = grp_sz <= size_t(max_num_wg); + + bool is_lite = clens[0] == 1 && blens[0] == 1 && alens[0] == 1 && + (blens[1] == clens[1] || blens[1] == 1) && blens[2] == clens[2]; + + if(lite_applicable && is_lite) + { + return true; + } + } + + return false; +} + +std::size_t Op2dTensorLite::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution Op2dTensorLite::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + const size_t a_cstride = aTensorDesc.GetStrides()[1]; + const size_t b_cstride = bTensorDesc.GetStrides()[1]; + const size_t c_cstride = cTensorDesc.GetStrides()[1]; + + miopenDataType_t data_type = bTensorDesc.GetType(); + + auto&& [num_wg, work_per_wg, bitmap] = GetBitmapAndWgInfo(blens, clens); + + int max_num_wg = 4096; + num_wg = num_wg > max_num_wg ? max_num_wg : num_wg; + + size_t local_threads = 256; + + // for naive tensor ops + auto&& [RD_BLCK, READ_TYPE] = GetRDBLCKandREADTYPE(clens[2], data_type); + + size_t total_work = std::max(clens[2] / RD_BLCK, size_t(1)); + size_t grp_sz = (total_work + local_threads - 1) / local_threads; + + grp_sz = std::min(size_t(max_num_wg), grp_sz); + size_t glb_sz = local_threads * grp_sz; + + size_t local_threads2 = 64; + size_t total_work2 = clens[1]; + size_t grp_sz2 = (total_work2 + local_threads2 - 1) / local_threads2; + grp_sz2 = std::min(size_t(max_num_wg / grp_sz), grp_sz2); + size_t glb_sz2 = local_threads2 * grp_sz2; + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{glb_sz, glb_sz2, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("USE_2D_TENSOR_LITE"); + build_params.Define("RD_BLCK", std::to_string(RD_BLCK)); + build_params.Define("READ_TYPE", READ_TYPE); + + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + kernel.kernel_file = "MIOpenTensorKernels.cl"; + kernel.kernel_name = "Op2dTensorLite"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = + [data_type, b_c = blens[1], a_cstride, b_cstride, c_cstride, total_work, total_work2]( + const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + kernel(params.ATensor, + static_cast(a_cstride), + params.BTensor, + static_cast(b_cstride), + params.CTensor, + static_cast(c_cstride), + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(total_work), + static_cast(total_work2), + static_cast(!float_equal(miopen_beta, 0.0)), + static_cast(b_c == 1)); + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/Op2dTensorSquash.cpp b/src/solver/tensorOp/Op2dTensorSquash.cpp new file mode 100644 index 0000000000..d6ca7cfa3b --- /dev/null +++ b/src/solver/tensorOp/Op2dTensorSquash.cpp @@ -0,0 +1,175 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op2dTensorSquash::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& alens = aTensorDesc.GetLengths(); + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 3) + { + bool is_lite = clens[0] == 1 && blens[0] == 1 && alens[0] == 1 && + (blens[1] == clens[1] || blens[1] == 1) && blens[2] == clens[2]; + + bool is_squashed = + problem.GetNonStandardSquash() && !is_lite && + (blens[0] == 1 && clens[0] == 1 && clens[1] == 1 && blens[2] == clens[2]); + + if(is_squashed) + { + return true; + } + } + + return false; +} + +std::size_t Op2dTensorSquash::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution +Op2dTensorSquash::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + const size_t b_nstride = bTensorDesc.GetStrides()[1]; + + miopenDataType_t data_type = bTensorDesc.GetType(); + + auto&& [num_wg, work_per_wg, bitmap] = GetBitmapAndWgInfo(blens, clens); + + int max_num_wg = 4096; + num_wg = num_wg > max_num_wg ? max_num_wg : num_wg; + + size_t local_threads = 256; + + // for naive tensor ops + auto&& [RD_BLCK, READ_TYPE] = GetRDBLCKandREADTYPE(clens[2], data_type); + + size_t total_work = std::max(clens[2] / RD_BLCK, size_t(1)); + size_t grp_sz = (total_work + local_threads - 1) / local_threads; + + grp_sz = std::min(size_t(max_num_wg), grp_sz); + size_t glb_sz = local_threads * grp_sz; + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{glb_sz, 1, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("USE_2D_TENSOR_SQUASH"); + build_params.Define("RD_BLCK", std::to_string(RD_BLCK)); + build_params.Define("READ_TYPE", READ_TYPE); + + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + kernel.kernel_file = "MIOpenTensorKernels.cl"; + kernel.kernel_name = "Op2dTensorSquash"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = + [data_type, b_c = blens[1], b_nstride, total_work](const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + kernel(params.ATensor, + params.BTensor, + static_cast(b_c), + static_cast(b_nstride), + params.CTensor, + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(total_work), + static_cast(!float_equal(miopen_alpha0, 0.0)), + static_cast(!float_equal(miopen_alpha1, 0.0)), + static_cast(!float_equal(miopen_beta, 0.0))); + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/Op3dTensorGeneric.cpp b/src/solver/tensorOp/Op3dTensorGeneric.cpp new file mode 100644 index 0000000000..2bafc6abaa --- /dev/null +++ b/src/solver/tensorOp/Op3dTensorGeneric.cpp @@ -0,0 +1,164 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op3dTensorGeneric::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& alens = aTensorDesc.GetLengths(); + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 3) + { + return true; + } + + return false; +} + +std::size_t Op3dTensorGeneric::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution +Op3dTensorGeneric::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + std::array astrides; + std::array bstrides; + std::array cstrides; + std::tie(astrides[0], astrides[1], astrides[2]) = miopen::tien<3>(aTensorDesc.GetStrides()); + std::tie(bstrides[0], bstrides[1], bstrides[2]) = miopen::tien<3>(bTensorDesc.GetStrides()); + std::tie(cstrides[0], cstrides[1], cstrides[2]) = miopen::tien<3>(cTensorDesc.GetStrides()); + + miopenDataType_t data_type = bTensorDesc.GetType(); + + auto&& [num_wg, work_per_wg, bitmap] = GetBitmapAndWgInfo(blens, clens); + + int max_num_wg = 4096; + num_wg = num_wg > max_num_wg ? max_num_wg : num_wg; + + size_t local_threads = 256; + size_t global_threads = num_wg * local_threads; + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("USE_3D_TENSOR_GENERIC"); + + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); + kernel.kernel_file = "MIOpenTensorKernelsHip.cpp"; + kernel.kernel_name = "Op3dTensorGeneric"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = + [data_type, blens, clens, astrides, bstrides, cstrides](const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + kernel(params.ATensor, + params.BTensor, + params.CTensor, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(blens[1] == 1 ? clens[1] : blens[1]), // b_c, + static_cast(blens[2] == 1 ? clens[2] : blens[2]), // b_h, + static_cast(clens[1]), // c_c, + static_cast(clens[2]), // c_h, + static_cast(astrides[0]), // a_nstride, + static_cast(astrides[1]), // a_cstride, + static_cast(astrides[2]), // a_hstride, + static_cast(blens[0] == 1 ? 0 : bstrides[0]), // b_nstride, + static_cast(blens[1] == 1 ? 0 : bstrides[1]), // b_cstride, + static_cast(blens[2] == 1 ? 0 : bstrides[2]), // b_hstride, + static_cast(cstrides[0]), // c_nstride, + static_cast(cstrides[1]), // c_cstride, + static_cast(cstrides[2]), // c_hstride, + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(clens[0]), + !float_equal(miopen_beta, 0.0)); + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/Op4dTensorGeneric.cpp b/src/solver/tensorOp/Op4dTensorGeneric.cpp new file mode 100644 index 0000000000..3c67a3411f --- /dev/null +++ b/src/solver/tensorOp/Op4dTensorGeneric.cpp @@ -0,0 +1,170 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op4dTensorGeneric::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& alens = aTensorDesc.GetLengths(); + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 4) + { + return true; + } + + return false; +} + +std::size_t Op4dTensorGeneric::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution +Op4dTensorGeneric::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + std::array blens; + std::array clens; + std::tie(blens[0], blens[1], blens[2], blens[3]) = miopen::tien<4>(bTensorDesc.GetLengths()); + std::tie(clens[0], clens[1], clens[2], clens[3]) = miopen::tien<4>(cTensorDesc.GetLengths()); + + std::array astrides; + std::array bstrides; + std::array cstrides; + std::tie(astrides[0], astrides[1], astrides[2], astrides[3]) = + miopen::tien<4>(aTensorDesc.GetStrides()); + std::tie(bstrides[0], bstrides[1], bstrides[2], bstrides[3]) = + miopen::tien<4>(bTensorDesc.GetStrides()); + std::tie(cstrides[0], cstrides[1], cstrides[2], cstrides[3]) = + miopen::tien<4>(cTensorDesc.GetStrides()); + + miopenDataType_t data_type = bTensorDesc.GetType(); + + int max_num_wg = 4096; + + auto&& [num_wg_orig, work_per_wg, incr_wg, bitmap, local_threads, global_threads] = + Get4dParams(problem, false); + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("USE_4D_TENSOR_GENERIC"); + build_params.Define("MAX_NUM_WG", std::to_string(max_num_wg)); + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + kernel.kernel_file = "MIOpenTensorKernels.cl"; + kernel.kernel_name = "Op4dTensorGeneric"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = + [data_type, blens, clens, astrides, bstrides, cstrides, work_per_wg, num_wg_orig, bitmap]( + const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + kernel(params.ATensor, + static_cast(astrides[0]), // a_nstride, + static_cast(astrides[1]), // a_cstride, + static_cast(astrides[2]), // a_hstride, + params.BTensor, + static_cast(blens[1]), // b_c, + static_cast(blens[2]), // b_h, + static_cast(blens[3]), // b_w, + static_cast(bstrides[0]), // b_nstride, + static_cast(bstrides[1]), // b_cstride, + static_cast(bstrides[2]), // b_hstride, + params.CTensor, + static_cast(clens[1]), // c_c, + static_cast(clens[2]), // c_h, + static_cast(clens[3]), // c_w, + static_cast(cstrides[0]), // c_nstride, + static_cast(cstrides[1]), // c_cstride, + static_cast(cstrides[2]), // c_hstride, + miopen_alpha0, + miopen_alpha1, + miopen_beta, + bitmap, + work_per_wg, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(num_wg_orig)); + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/Op4dTensorLite.cpp b/src/solver/tensorOp/Op4dTensorLite.cpp new file mode 100644 index 0000000000..a53174507e --- /dev/null +++ b/src/solver/tensorOp/Op4dTensorLite.cpp @@ -0,0 +1,165 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op4dTensorLite::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& alens = aTensorDesc.GetLengths(); + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 4) + { + auto&& [num_wg, work_per_wg, bitmap] = GetBitmapAndWgInfo(blens, clens); + + // quick fix for btensor = <1, 1, 1, 1> + if(bTensorDesc.GetElementSize() == 1) + bitmap = 4; + + bool fwd_conv_bias = (bitmap == (1 << 2)); + + bool packed_tensor = true; + packed_tensor &= aTensorDesc.IsPacked(); + packed_tensor &= bTensorDesc.IsPacked(); + packed_tensor &= cTensorDesc.IsPacked(); + + bool packed_equal_tensor = + packed_tensor && (bTensorDesc.GetElementSize() == cTensorDesc.GetElementSize()); + + if(!fwd_conv_bias && packed_equal_tensor) + { + return true; + } + } + + return false; +} + +std::size_t Op4dTensorLite::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution Op4dTensorLite::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + miopenDataType_t data_type = bTensorDesc.GetType(); + + auto&& [num_wg_orig, work_per_wg, incr_wg, bitmap, local_threads, global_threads] = + Get4dParams(problem, true); + + auto&& [RD_BLCK, READ_TYPE] = + GetRDBLCKandREADTYPE(cTensorDesc.GetElementSize(), bTensorDesc.GetType()); + + size_t total_work = std::max(cTensorDesc.GetElementSize() / RD_BLCK, size_t(1)); + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("USE_4D_TENSOR_LITE"); + build_params.Define("RD_BLCK", std::to_string(RD_BLCK)); + build_params.Define("READ_TYPE", READ_TYPE); + + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + kernel.kernel_file = "MIOpenTensorKernels.cl"; + kernel.kernel_name = "Op4dTensorLite"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = [data_type, total_work](const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + kernel(params.ATensor, + params.BTensor, + params.CTensor, + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(total_work), + static_cast(!float_equal(miopen_beta, 0.0))); + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/Op5dTensorGeneric.cpp b/src/solver/tensorOp/Op5dTensorGeneric.cpp new file mode 100644 index 0000000000..35ef705f5b --- /dev/null +++ b/src/solver/tensorOp/Op5dTensorGeneric.cpp @@ -0,0 +1,179 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool Op5dTensorGeneric::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& alens = aTensorDesc.GetLengths(); + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 5) + { + return true; + } + + return false; +} + +std::size_t Op5dTensorGeneric::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution +Op5dTensorGeneric::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + std::array astrides; + std::array bstrides; + std::array cstrides; + std::tie(astrides[0], astrides[1], astrides[2], astrides[3], astrides[4]) = + miopen::tien<5>(aTensorDesc.GetStrides()); + std::tie(bstrides[0], bstrides[1], bstrides[2], bstrides[3], bstrides[4]) = + miopen::tien<5>(bTensorDesc.GetStrides()); + std::tie(cstrides[0], cstrides[1], cstrides[2], cstrides[3], cstrides[4]) = + miopen::tien<5>(cTensorDesc.GetStrides()); + + miopenDataType_t data_type = bTensorDesc.GetType(); + + auto&& [num_wg, work_per_wg, bitmap] = GetBitmapAndWgInfo(blens, clens); + + int num_wg_orig = num_wg; + int max_num_wg = 4096; + num_wg = num_wg > max_num_wg ? max_num_wg : num_wg; + + size_t local_threads = 256; + size_t global_threads = num_wg * local_threads; + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("USE_5D_TENSOR_GENERIC"); + build_params.Define("MAX_NUM_WG", std::to_string(max_num_wg)); + + auto kernel = KernelInfo{}; + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + kernel.kernel_file = "MIOpenTensorKernels.cl"; + kernel.kernel_name = "Op5dTensorGeneric"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = + [data_type, blens, clens, astrides, bstrides, cstrides, bitmap, work_per_wg, num_wg_orig]( + const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + kernel(params.ATensor, + static_cast(astrides[0]), + static_cast(astrides[1]), + static_cast(astrides[2]), + static_cast(astrides[3]), + params.BTensor, + static_cast(blens[1]), // b_c, + static_cast(blens[2]), // b_d, + static_cast(blens[3]), // b_h, + static_cast(blens[4]), // b_w, + static_cast(bstrides[0]), // b_nstride, + static_cast(bstrides[1]), // b_cstride, + static_cast(bstrides[2]), // b_dstride, + static_cast(bstrides[3]), // b_hstride, + params.CTensor, + static_cast(clens[1]), // c_c, + static_cast(clens[2]), // c_d, + static_cast(clens[3]), // c_h, + static_cast(clens[4]), // c_w, + static_cast(cstrides[0]), // c_nstride, + static_cast(cstrides[1]), // c_cstride, + static_cast(cstrides[2]), // c_dstride, + static_cast(cstrides[3]), // c_hstride, + miopen_alpha0, + miopen_alpha1, + miopen_beta, + bitmap, + work_per_wg, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(num_wg_orig)); + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/OpTensorFwdBias.cpp b/src/solver/tensorOp/OpTensorFwdBias.cpp new file mode 100644 index 0000000000..9df036df8c --- /dev/null +++ b/src/solver/tensorOp/OpTensorFwdBias.cpp @@ -0,0 +1,224 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool OpTensorFwdBias::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& alens = aTensorDesc.GetLengths(); + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 4) + { + auto&& [num_wg, work_per_wg, bitmap] = GetBitmapAndWgInfo(blens, clens); + + // quick fix for btensor = <1, 1, 1, 1> + if(bTensorDesc.GetElementSize() == 1) + bitmap = 4; + + bool fwd_conv_bias = (bitmap == (1 << 2)); + + if(fwd_conv_bias) + { + return true; + } + } + return false; +} + +std::size_t OpTensorFwdBias::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution OpTensorFwdBias::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + std::array blens; + std::array clens; + std::tie(blens[0], blens[1], blens[2], blens[3]) = miopen::tien<4>(bTensorDesc.GetLengths()); + std::tie(clens[0], clens[1], clens[2], clens[3]) = miopen::tien<4>(cTensorDesc.GetLengths()); + + std::array astrides; + std::array bstrides; + std::array cstrides; + std::tie(astrides[0], astrides[1], astrides[2], astrides[3]) = + miopen::tien<4>(aTensorDesc.GetStrides()); + std::tie(bstrides[0], bstrides[1], bstrides[2], bstrides[3]) = + miopen::tien<4>(bTensorDesc.GetStrides()); + std::tie(cstrides[0], cstrides[1], cstrides[2], cstrides[3]) = + miopen::tien<4>(cTensorDesc.GetStrides()); + + miopenDataType_t data_type = bTensorDesc.GetType(); + + int max_num_wg = 4096; + + auto&& [num_wg_orig, work_per_wg, incr_wg, bitmap, local_threads, global_threads] = + Get4dParams(problem, false); + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + bool packed_tensor = true; + packed_tensor &= aTensorDesc.IsPacked(); + packed_tensor &= bTensorDesc.IsPacked(); + packed_tensor &= cTensorDesc.IsPacked(); + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("MAX_NUM_WG", std::to_string(max_num_wg)); + + auto kernel = KernelInfo{}; + + if(packed_tensor) + { + build_params.Define("USE_FWD_BIAS"); + kernel.kernel_name = "OpTensorFwdBias"; + } + else + { + build_params.Define("USE_FWD_BIAS_GENERIC"); + kernel.kernel_name = "OpTensorFwdBiasGeneric"; + } + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + kernel.kernel_file = "MIOpenTensorKernels.cl"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = [data_type, + blens, + clens, + astrides, + bstrides, + cstrides, + work_per_wg, + num_wg_orig, + incr_wg, + packed_tensor](const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + if(packed_tensor) + { // OpTensorFwdBias + kernel(params.ATensor, + params.BTensor, + static_cast(blens[1]), + params.CTensor, + static_cast(clens[0]), + static_cast(cstrides[0]), + static_cast(cstrides[1]), + work_per_wg, + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(num_wg_orig), + static_cast(incr_wg)); + } + else + { // OpTensorFwdBiasGeneric + kernel(params.ATensor, + static_cast(astrides[0]), + static_cast(astrides[1]), + static_cast(astrides[2]), + params.BTensor, + static_cast(blens[1]), + static_cast(bstrides[1]), + params.CTensor, + static_cast(clens[0]), + static_cast(clens[3]), + static_cast(cstrides[0]), + static_cast(cstrides[1]), + static_cast(cstrides[2]), + miopen_alpha0, + miopen_alpha1, + miopen_beta, + work_per_wg, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(num_wg_orig), + static_cast(incr_wg)); + } + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/OpTensorLeadingOnes.cpp b/src/solver/tensorOp/OpTensorLeadingOnes.cpp new file mode 100644 index 0000000000..d930da0da6 --- /dev/null +++ b/src/solver/tensorOp/OpTensorLeadingOnes.cpp @@ -0,0 +1,241 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "tensor_op_helpers.hpp" +#include +#include +#include +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +bool OpTensorLeadingOnes::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& alens = aTensorDesc.GetLengths(); + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + auto asize = alens.size(); + + if(aTensorDesc.GetType() == miopenDouble) + { + return false; + } + + if(asize == 4) + { + + auto&& [num_wg, work_per_wg, bitmap] = GetBitmapAndWgInfo(blens, clens); + + // quick fix for btensor = <1, 1, 1, 1> + if(bTensorDesc.GetElementSize() == 1) + bitmap = 4; + + bool fwd_conv_bias = (bitmap == (1 << 2)); + + bool packed_tensor = true; + packed_tensor &= aTensorDesc.IsPacked(); + packed_tensor &= bTensorDesc.IsPacked(); + packed_tensor &= cTensorDesc.IsPacked(); + + bool packed_equal_tensor = + packed_tensor && (bTensorDesc.GetElementSize() == cTensorDesc.GetElementSize()); + + // first_not_one is incorrect if btensor size equal to 1 + auto first_not_one = + std::find_if(blens.rbegin(), blens.rend(), [](int i) { return i != 1; }); + auto d = std::distance(blens.begin(), first_not_one.base()); + + bool leading_ones = IsBitmapLeadingOnes(bitmap, clens.size(), static_cast(d - 2)); + + if(!fwd_conv_bias && !packed_equal_tensor && leading_ones) + { + return true; + } + } + + return false; +} + +std::size_t OpTensorLeadingOnes::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::tensorOp::ProblemDescription& problem) const +{ + return 0; +} + +ConvSolution +OpTensorLeadingOnes::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::tensorOp::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + const auto& aTensorDesc = problem.GetATensorDesc(); + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + std::array clens; + std::tie(clens[0], clens[1], clens[2], clens[3]) = miopen::tien<4>(cTensorDesc.GetLengths()); + + std::array astrides; + std::array bstrides; + std::array cstrides; + std::tie(astrides[0], astrides[1], astrides[2], astrides[3]) = + miopen::tien<4>(aTensorDesc.GetStrides()); + std::tie(bstrides[0], bstrides[1], bstrides[2], bstrides[3]) = + miopen::tien<4>(bTensorDesc.GetStrides()); + std::tie(cstrides[0], cstrides[1], cstrides[2], cstrides[3]) = + miopen::tien<4>(cTensorDesc.GetStrides()); + + miopenDataType_t data_type = bTensorDesc.GetType(); + + int max_num_wg = 4096; + + auto&& [num_wg_orig, work_per_wg, incr_wg, bitmap, local_threads, global_threads] = + Get4dParams(problem, false); + + const std::array vld{local_threads, 1, 1}; + const std::array vgd{global_threads, 1, 1}; + + bool packed_tensor = true; + packed_tensor &= aTensorDesc.IsPacked(); + packed_tensor &= bTensorDesc.IsPacked(); + packed_tensor &= cTensorDesc.IsPacked(); + + KernelBuildParameters build_params = KernelBuildParameters{}; + + GetCommonParams(build_params, problem, false); + + build_params.Define("MAX_NUM_WG", std::to_string(max_num_wg)); + auto kernel = KernelInfo{}; + + if(packed_tensor) + { + build_params.Define("USE_LEADING_ONES"); + kernel.kernel_name = "OpTensorLeadingOnes"; + } + else + { + build_params.Define("USE_LEADING_ONES_GENERIC"); + kernel.kernel_name = "OpTensorLeadingOnesGeneric"; + } + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); + kernel.kernel_file = "MIOpenTensorKernels.cl"; + + using std::begin, std::end; + + kernel.l_wk.insert(end(kernel.l_wk), begin(vld), end(vld)); + kernel.g_wk.insert(end(kernel.g_wk), begin(vgd), end(vgd)); + + result.invoker_factory = [data_type, + clens, + astrides, + bstrides, + cstrides, + work_per_wg, + num_wg_orig, + bitmap, + packed_tensor](const std::vector kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels.front()); + decltype(auto) params = raw_params.CastTo(); + + visit_float(data_type, [&](auto as_float) { + auto miopen_alpha0 = as_float(*(static_cast(params.alpha0))); + auto miopen_alpha1 = as_float(*(static_cast(params.alpha1))); + auto miopen_beta = as_float(*(static_cast(params.beta))); + + if(packed_tensor) + { // OpTensorLeadingOnes + kernel(params.ATensor, + params.BTensor, + params.CTensor, + static_cast(clens[1]), + static_cast(clens[2]), + static_cast(clens[3]), + static_cast(cstrides[0]), + static_cast(cstrides[1]), + work_per_wg, + miopen_alpha0, + miopen_alpha1, + miopen_beta, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(num_wg_orig), + bitmap); + } + else + { // OpTensorLeadingOnesGeneric + kernel(params.ATensor, + static_cast(astrides[0]), + static_cast(astrides[1]), + static_cast(astrides[2]), + params.BTensor, + static_cast(bstrides[0]), + static_cast(bstrides[1]), + static_cast(bstrides[2]), + params.CTensor, + static_cast(clens[1]), + static_cast(clens[2]), + static_cast(clens[3]), + static_cast(cstrides[0]), + static_cast(cstrides[1]), + static_cast(cstrides[2]), + miopen_alpha0, + miopen_alpha1, + miopen_beta, + work_per_wg, + static_cast(params.Aoffset), + static_cast(params.Boffset), + static_cast(params.Coffset), + static_cast(num_wg_orig), + bitmap); + } + }); + }; + }; + result.construction_params.push_back(kernel); + + return result; +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/solver/tensorOp/tensor_op_helpers.hpp b/src/solver/tensorOp/tensor_op_helpers.hpp new file mode 100644 index 0000000000..46ce39e4a0 --- /dev/null +++ b/src/solver/tensorOp/tensor_op_helpers.hpp @@ -0,0 +1,218 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include +#include +#include + +#include + +namespace miopen { + +namespace solver { + +namespace tensorOp { + +inline void GetCommonParams(KernelBuildParameters& build_params, + const miopen::tensorOp::ProblemDescription& problem, + bool is64bSupported) +{ + build_params.Define("MIOPEN_TYPE", miopen::GetDataType(problem.GetBTensorDesc().GetType())); + + switch(problem.GetTensorOp()) + { + case 0: build_params.Define("MIOPEN_TENSOR_OP", "miopenAdd"); break; + case 1: build_params.Define("MIOPEN_TENSOR_OP", "miopenMul"); break; + case 2: build_params.Define("MIOPEN_TENSOR_OP", "miopenMin"); break; + case 3: build_params.Define("MIOPEN_TENSOR_OP", "miopenMax"); break; + } + + if(is64bSupported && problem.GetATensorDesc().AllDimsFitIntoInt()) + { + build_params.Define("DIM_TYPE", "uint32_t"); + } + else + { + build_params.Define("DIM_TYPE", "uint64_t"); + } +} + +inline std::tuple GetRDBLCKandREADTYPE(size_t len, miopenDataType_t type) +{ + const std::string data_type = GetDataType(type); + size_t RD_BLCK = (len % 4 == 0) ? 4 : (len % 2 == 0) ? 2 : 1; + return std::make_tuple(RD_BLCK, + (RD_BLCK == 1) ? data_type : data_type + std::to_string(RD_BLCK)); +} + +inline std::tuple GetBitmapAndWgInfo(const std::vector& blens, + const std::vector& clens) +{ + // first_not_one is incorrect if btensor size equal to 1 + auto first_not_one = std::find_if(blens.rbegin(), blens.rend(), [](int i) { return i != 1; }); + auto d = std::distance(blens.begin(), first_not_one.base()); + + // quick fix + int num_wg = first_not_one != blens.rend() + ? static_cast(*first_not_one == 0 ? 1 : *first_not_one) + : 1; + + int work_per_wg = std::accumulate(clens.begin() + d, clens.end(), 1, std::multiplies()); + + unsigned int bitmap = 0; + // update bitmap for first_not_one + bitmap |= (1 << (blens.size() - d)); + + for(int i = (d - 2); i >= 0; i--) + { + if(blens[i] != 1) + { + bitmap |= (1 << (blens.size() - (i + 1))); + num_wg *= blens[i]; + } + else + { + work_per_wg *= clens[i]; + } + } + + return std::make_tuple(num_wg, work_per_wg, bitmap); +} + +inline bool IsBitmapLeadingOnes(unsigned int bitmap, int n_size, int first_not_one) +{ + bool leading_ones = true; + for(int i = first_not_one; i >= 0; i--) + { + bool is_one = (bitmap & (1 << (n_size - 1 - i))) != 0u; + leading_ones &= is_one; + } + return leading_ones; +} + +inline std::tuple +Get4dParams(const miopen::tensorOp::ProblemDescription& problem, bool is4dLite) +{ + const auto& bTensorDesc = problem.GetBTensorDesc(); + const auto& cTensorDesc = problem.GetCTensorDesc(); + + const auto& blens = bTensorDesc.GetLengths(); + const auto& clens = cTensorDesc.GetLengths(); + + auto dims = clens.size(); + + // first_not_one is incorrect if btensor size equal to 1 + auto first_not_one = std::find_if(blens.rbegin(), blens.rend(), [](int i) { return i != 1; }); + auto d = std::distance(blens.begin(), first_not_one.base()); + + // quick fix + int num_wg = first_not_one != blens.rend() + ? static_cast(*first_not_one == 0 ? 1 : *first_not_one) + : 1; + + int work_per_wg = std::accumulate(clens.begin() + d, clens.end(), 1, std::multiplies()); + + unsigned int bitmap = 0; + // update bitmap for first_not_one + bitmap |= (1 << (blens.size() - d)); + + for(int i = (d - 2); i >= 0; i--) + { + if(blens[i] != 1) + { + bitmap |= (1 << (blens.size() - (i + 1))); + num_wg *= blens[i]; + } + else + { + work_per_wg *= clens[i]; + } + } + + // quick fix for btensor = <1, 1, 1, 1> + if(bTensorDesc.GetElementSize() == 1) + bitmap = 4; + + int incr_wg = 0; + // Forward Convolution Bias specialization + // for fwd-bias, bitmap looks like <0, 1, 0, 0> + // Is the no. of work-groups and the work for each wg balanced? + auto fwd_conv_bias = bitmap == (1 << 2) ? 1 : 0; + // This block gives off indexing for 5d tensors, skipping + if(fwd_conv_bias == 1 && dims < 5 && num_wg < 640 && work_per_wg > 256 && clens[0] > 0) + { // 640 workgroups of size 256 needed to completely fill the GPU + + work_per_wg /= clens[0]; // c_n; + num_wg *= clens[0]; // c_n; + incr_wg = 1; + } + + int num_wg_orig = num_wg; + int max_num_wg = 4096; + num_wg = num_wg > max_num_wg ? max_num_wg : num_wg; + + size_t local_threads = 256; + + bool leading_ones = IsBitmapLeadingOnes(bitmap, clens.size(), static_cast(d - 2)); + + if(leading_ones && work_per_wg < 64) + { + local_threads = 64; + } + + // Special case for adding tensors in place + size_t global_threads = + (static_cast(leading_ones) == 1 && (d - 1) == 3) ? num_wg : num_wg * local_threads; + global_threads = (global_threads < local_threads) ? local_threads : global_threads; + + if(is4dLite) + { + // for naive tensor ops + const std::string data_type = GetDataType(bTensorDesc.GetType()); + + size_t TENS_LEN = cTensorDesc.GetElementSize(); + size_t RD_BLCK = (TENS_LEN % 4 == 0) ? 4 : (TENS_LEN % 2 == 0) ? 2 : 1; + const std::string READ_TYPE = + (RD_BLCK == 1) ? data_type : data_type + std::to_string(RD_BLCK); + + size_t total_work = std::max(TENS_LEN / RD_BLCK, size_t(1)); + size_t grp_sz = (total_work + local_threads - 1) / local_threads; + grp_sz = std::min(size_t(max_num_wg), grp_sz); + size_t glb_sz = local_threads * grp_sz; + + global_threads = glb_sz; + } + + return std::make_tuple( + num_wg_orig, work_per_wg, incr_wg, bitmap, local_threads, global_threads); +} + +} // namespace tensorOp + +} // namespace solver + +} // namespace miopen diff --git a/src/tensor.cpp b/src/tensor.cpp index 3e5190bc25..c1bd709267 100644 --- a/src/tensor.cpp +++ b/src/tensor.cpp @@ -28,6 +28,10 @@ #include #include #include +#include +#include +#include +#include #include @@ -868,6 +872,57 @@ void from_json(const nlohmann::json& j, TensorDescriptor& descriptor) j.at("type").get_to(descriptor.type); } +void OpTensor2(Handle& handle, + miopenTensorOp_t tensorOp, + const void* alpha0, + const TensorDescriptor& aTensorDesc, + ConstData_t ATensor, + const void* alpha1, + const TensorDescriptor& bTensorDesc, + ConstData_t BTensor, + const void* beta, + const TensorDescriptor& cTensorDesc, + Data_t CTensor, + const size_t Aoffset, + const size_t Boffset, + const size_t Coffset, + bool nonStandardSquash) +{ + if(ATensor == nullptr || BTensor == nullptr || CTensor == nullptr) + { + MIOPEN_THROW(miopenStatusBadParm); + } + + if(alpha0 == nullptr) + { + MIOPEN_THROW(miopenStatusBadParm, "Alpha0 value is nullptr"); + } + + if(alpha1 == nullptr) + { + MIOPEN_THROW(miopenStatusBadParm, "Alpha1 value is nullptr"); + } + + const auto problem = tensorOp::ProblemDescription{ + tensorOp, beta, aTensorDesc, bTensorDesc, cTensorDesc, nonStandardSquash}; + + const auto invoke_params = tensorOp::InvokeParams{ + alpha0, ATensor, alpha1, BTensor, beta, CTensor, Aoffset, Boffset, Coffset}; + + const auto algo = AlgorithmName{"TensorOpSolver"}; + const auto solvers = solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{} + + solver::SolverContainer{}; + solvers.ExecutePrimitive(handle, problem, algo, invoke_params); +} + } // namespace miopen int miopenGetTensorIndex(miopenTensorDescriptor_t tensorDesc, std::initializer_list indices) diff --git a/src/tensorOp/problem_description.cpp b/src/tensorOp/problem_description.cpp new file mode 100644 index 0000000000..6053e7f1a0 --- /dev/null +++ b/src/tensorOp/problem_description.cpp @@ -0,0 +1,76 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include +#include + +namespace miopen { + +namespace tensorOp { + +NetworkConfig ProblemDescription::MakeNetworkConfig() const +{ + std::string ss; + + const auto& alens = aTensorDesc.GetLengths(); + const auto& blens = bTensorDesc.GetLengths(); + + const auto& astrides = aTensorDesc.GetStrides(); + const auto& bstrides = bTensorDesc.GetStrides(); + const auto& cstrides = cTensorDesc.GetStrides(); + + auto printDims = [&ss, dims = alens.size() - 1](const auto& dim) { + for(uint32_t i = 0; i < dims; i++) + { + ss.append(std::to_string(dim[i])); + ss += 'x'; + } + ss += std::to_string(dim.back()); + ss += '-'; + }; + + ss.reserve(1024); + ss.append(std::string_view("TensorOp-")); + ss += std::to_string(aTensorDesc.GetType()); + ss += '-'; + ss += std::to_string(tensorOp); + ss += '-'; + + printDims(alens); + printDims(blens); + printDims(astrides); + printDims(bstrides); + printDims(cstrides); + + ss += (float_equal(beta, 0.0f) ? '1' : '0'); + + return NetworkConfig(std::move(ss)); +} + +} // namespace tensorOp + +} // namespace miopen diff --git a/test/tensor_ops.cpp b/test/tensor_ops.cpp index 3121715e8a..1df83044b2 100644 --- a/test/tensor_ops.cpp +++ b/test/tensor_ops.cpp @@ -181,24 +181,24 @@ struct verify_tensor_ops auto a_dev = handle.Write(a.data); auto b_dev = handle.Write(b.data); - miopen::OpTensor(handle, - // miopenTensorOpAdd, - // miopenTensorOpMax, - // miopenTensorOpMin, - miopenTensorOpMul, - &alpha0, - a.desc, - a_dev.get(), - &alpha1, - b.desc, - b_dev.get(), - &beta, - c.desc, - c_dev.get(), - Aoffset, - Boffset, - Coffset, - false); // it does not verify non-standard behaviour + miopen::OpTensor2(handle, + // miopenTensorOpAdd, + // miopenTensorOpMax, + // miopenTensorOpMin, + miopenTensorOpMul, + &alpha0, + a.desc, + a_dev.get(), + &alpha1, + b.desc, + b_dev.get(), + &beta, + c.desc, + c_dev.get(), + Aoffset, + Boffset, + Coffset, + false); // it does not verify non-standard behaviour if(not no_validate) {