From 4d2653e8390adcbc6fe56e004f5228b77bf8311a Mon Sep 17 00:00:00 2001 From: Tingyu <49021685+TingyuZhangSie@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:41:11 +0800 Subject: [PATCH] [Feature](bangc-ops): add transform binary oprator. (#839) Co-authored-by: zhangtingyu --- bangc-ops/kernels/kernel_wrapper/wrapper.h | 12 +- bangc-ops/kernels/transform/transform.cpp | 40 ++++ bangc-ops/mlu_op.h | 95 +++++++++ .../zoo/transform/test_case/case_0.prototxt | 49 +++++ .../zoo/transform/test_case/case_1.prototxt | 41 ++++ .../zoo/transform/test_case/complex.prototxt | 37 ++++ .../zoo/transform/test_case/half_0.prototxt | 37 ++++ .../zoo/transform/test_case/int32_0.prototxt | 37 ++++ .../pb_gtest/src/zoo/transform/transform.cpp | 194 ++++++++++++++++++ .../pb_gtest/src/zoo/transform/transform.h | 31 +++ .../user_guide/9_operators/index.rst | 14 ++ 11 files changed, 586 insertions(+), 1 deletion(-) create mode 100644 bangc-ops/kernels/transform/transform.cpp create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h diff --git a/bangc-ops/kernels/kernel_wrapper/wrapper.h b/bangc-ops/kernels/kernel_wrapper/wrapper.h index 30063c935..10eea30cb 100644 --- a/bangc-ops/kernels/kernel_wrapper/wrapper.h +++ b/bangc-ops/kernels/kernel_wrapper/wrapper.h @@ -276,6 +276,16 @@ const mluOpTensorDescriptor_t, const void *, \ const mluOpTensorDescriptor_t, void *diff_x +#define TRANSFORM_PARAM_TYPE \ + mluOpHandle_t, \ + const mluOpPointerMode_t, \ + const void *, \ + const mluOpTensorDescriptor_t, \ + const void *, \ + const void *, \ + const mluOpTensorDescriptor_t, \ + void * + #define STRIDEDSLICE_PARAM_TYPE \ mluOpHandle_t, const mluOpTensorDescriptor_t, const void *, \ const int *, const int *, const int *, \ @@ -306,7 +316,6 @@ KERNEL_REGISTER(RoiAlignBackward, ROIALIGNBACKWARD_PARAM_TYPE); KERNEL_REGISTER(RoiAlignBackwardV2, ROIALIGNBACKWARD_V2_PARAM_TYPE); KERNEL_REGISTER(RoiPoolingForward, ROIPOOLINGFORWARD_PARAM_TYPE); KERNEL_REGISTER(RoiPoolingBackward, ROIPOOLINGBACKWARD_PARAM_TYPE); -KERNEL_REGISTER(transform, TRANSFORM_PARAM_TYPE); KERNEL_REGISTER(SyncBatchNormStats, SYNCBATCHNORMSTATS_PARAM_TYPE); KERNEL_REGISTER(SyncBatchNormStatsV2, SYNCBATCHNORMSTATS_V2_PARAM_TYPE); KERNEL_REGISTER(SyncBatchNormGatherStatsWithCounts, @@ -320,5 +329,6 @@ KERNEL_REGISTER(SyncBatchNormBackwardElemt, SYNCBATCHNORMBACKWARDELEMT_PARAM_TYPE); KERNEL_REGISTER(SyncBatchNormBackwardElemtV2, SYNCBATCHNORMBACKWARDELEMT_V2_PARAM_TYPE); +KERNEL_REGISTER(transform, TRANSFORM_PARAM_TYPE); KERNEL_REGISTER(StridedSlice, STRIDEDSLICE_PARAM_TYPE); #endif // KERNELS_KERNEL_WRAPPER_WRAPPER_H diff --git a/bangc-ops/kernels/transform/transform.cpp b/bangc-ops/kernels/transform/transform.cpp new file mode 100644 index 000000000..466a6abd3 --- /dev/null +++ b/bangc-ops/kernels/transform/transform.cpp @@ -0,0 +1,40 @@ +/************************************************************************* + * Copyright (C) [2023] by Cambricon, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *************************************************************************/ +#include "kernels/kernel_wrapper/wrapper.h" + +mluOpStatus_t MLUOP_WIN_API mluOpTransform( + mluOpHandle_t handle, + const mluOpPointerMode_t pointer_mode, + const void *alpha, + const mluOpTensorDescriptor_t input_desc, + const void *input, + const void *beta, + const mluOpTensorDescriptor_t output_desc, + void *output) { + transformWrapper wrapper; + mluOpStatus_t ret = wrapper.invoke( + handle, pointer_mode, alpha, input_desc, input, + beta, output_desc, output); + return ret; +} + diff --git a/bangc-ops/mlu_op.h b/bangc-ops/mlu_op.h index e6cd5468b..1f94993cf 100644 --- a/bangc-ops/mlu_op.h +++ b/bangc-ops/mlu_op.h @@ -15096,6 +15096,101 @@ mluOpSyncBatchNormBackwardElemtV2(mluOpHandle_t handle, const mluOpTensorDescriptor_t diff_x_desc, void *diff_x); +// Group:Transform +/*! + * @brief Linearly transforms an input tensor using the following formula: + * + * output = alpha * input + beta + * + * Parameters \b input and \b output represent tensor input and output, + * and \b alpha and \b beta represent scale factors. + * \b mluOpTransform supports scale pointer + * from both host and device for \b alpha and \b beta parameter. + * @param[in] handle + * Handle to a Cambricon MLUOP context used to manage MLU + * devices and queues. + * For detailed information, please refer to ::mluOpHandle_t. + * @param[in] input_desc + * Descriptor of the input tensor \b. + * Refer to ::mluOpTensorDescriptor_t for further detail. + * @param[in] input + * Device pointer to the MLU memory that stores the input tensor. + * @param[in] pointer_mode + * An enum value that indicates which mode scalar values \b alpha + * and \b beta would be passed by on the host or device. + * Mode type is defined in ::mluOpPointerMode_t. + * @param[in] alpha + * Pointer to scaling factor of tensor input. + * If \b pointer_mode is \b MLUOP_POINTER_MODE_DEVICE, \b alpha + * should be a device pointer. + * If \b pointer_mode is \b MLUOP_POINTER_MODE_HOST, \b alpha + * should be a host pointer. + * @param[in] beta + * Pointer to scaling factor of tensor input. + * If \b pointer_mode is \b MLUOP_POINTER_MODE_DEVICE, \b beta + * should be a device pointer. + * If \b pointer_mode is \b MLUOP_POINTER_MODE_HOST, \b beta + * should be a host pointer. + * @param[in] output_desc + * Descriptor of the output tensor \b output. For detailed + * information, please refer to ::mluOpTensorDescriptor_t. + * @param[out] output + * Descriptor of the output tensor. For detailed information, see + * ::mluOpTensorDescriptor_t. + * + * @par Return + * - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM + * + * @par Formula + * - See "Transform Operator" section in "Cambricon MLUOP User Guide" for + * details. + * + * @par Data Type + * - The combinations of the data type for input tensor \b input + * and output tensor + * \b output must be half-half, float-float or int32-int32. + * - \b alpha and \b beta: If data type of tensors is float or half, the data + * type of \b alpha and \b beta should be float pointer. + * If data type of tensors is + * int32, the data type of \b alpha and \b beta should be int pointer. + * + * @par Scale Limitation + * - Tensor descriptors of input and output tensors must be the same. + * - Dimension number should not exceeds \p MLUOP_DIM_MAX. + * + * @note + * - None. + * + * @par Requirements + * - None. + * + * @par Example + @verbatim + Input tensor : [[1, 2, 3], + [4, 5, 6], + [7, 8, 9]] + + alpha : 2 + + beta : 1 + + Output tensor : [[3, 5, 7], + [9, 11, 13], + [15, 17, 19]] + @endverbatim + * + */ + +mluOpStatus_t MLUOP_WIN_API +mluOpTransform(mluOpHandle_t handle, + const mluOpPointerMode_t pointer_mode, + const void *alpha, + const mluOpTensorDescriptor_t input_desc, + const void *input, + const void *beta, + const mluOpTensorDescriptor_t output_desc, + void *output); + // Group:StridedSlice /*! * @brief Extracts a slice of size ``(end - begin) / stride`` from the given \p input tensor. diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt new file mode 100644 index 000000000..0ec25f836 --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt @@ -0,0 +1,49 @@ +op_name: "transform" +op_type: "TRANSFORM" +input { + id: "input1" + shape: { + dims: 1 + dims: 1 + dims: 1 + dims: 1 + dims: 1 + dims: 1 + dims: 1000 + dims: 2000 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_FLOAT + random_data: { + seed: 23 + upper_bound: 100 + lower_bound: 0 + distribution: UNIFORM + } +} +output { + id: "output" + shape: { + dims: 1 + dims: 1 + dims: 1 + dims: 1 + dims: 1 + dims: 1 + dims: 1000 + dims: 2000 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_FLOAT +} +transform_param: { + alpha: 1 + beta: 1 +} +test_param: { + error_func: DIFF1 + error_func: DIFF2 + error_threshold: 0.003 + error_threshold: 0.003 + baseline_device: CPU +} diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt new file mode 100644 index 000000000..c1366ef7d --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt @@ -0,0 +1,41 @@ +op_name: "transform" +op_type: "TRANSFORM" +input { + id: "input1" + shape: { + dims: 13 + dims: 17 + dims: 100 + dims: 70 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_FLOAT + random_data: { + seed: 23 + upper_bound: 100 + lower_bound: 0 + distribution: UNIFORM + } +} +output { + id: "output" + shape: { + dims: 13 + dims: 17 + dims: 100 + dims: 70 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_FLOAT +} +transform_param: { + alpha: 1 + beta: 1 +} +test_param: { + error_func: DIFF1 + error_func: DIFF2 + error_threshold: 0.003 + error_threshold: 0.003 + baseline_device: CPU +} diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt new file mode 100644 index 000000000..23d388fd6 --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt @@ -0,0 +1,37 @@ +op_name: "transform" +op_type: "TRANSFORM" +input { + id: "input1" + shape: { + dims: 1000 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_COMPLEX_FLOAT + random_data: { + seed: 23 + upper_bound: 100 + lower_bound: 0 + distribution: UNIFORM + } +} +output { + id: "output" + shape: { + dims: 1000 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_COMPLEX_FLOAT +} +transform_param: { + alpha: 1.1 + alpha_imag:1.1 + beta: 1.2 + beta_imag:1.1 +} +test_param: { + error_func: DIFF1 + error_func: DIFF2 + error_threshold: 0.003 + error_threshold: 0.003 + baseline_device: CPU +} diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt new file mode 100644 index 000000000..5e55b0b18 --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt @@ -0,0 +1,37 @@ +op_name: "transform" +op_type: "TRANSFORM" +input { + id: "input1" + shape: { + dims: 1078 + dims: 201 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_HALF + random_data: { + seed: 23 + upper_bound: 100 + lower_bound: 0 + distribution: UNIFORM + } +} +output { + id: "output" + shape: { + dims: 1078 + dims: 201 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_HALF +} +transform_param: { + alpha: 1.5 + beta: 0.8 +} +test_param: { + error_func: DIFF1 + error_func: DIFF2 + error_threshold: 0.003 + error_threshold: 0.003 + baseline_device: CPU +} diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt new file mode 100644 index 000000000..ce449f367 --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt @@ -0,0 +1,37 @@ +op_name: "transform" +op_type: "TRANSFORM" +input { + id: "input1" + shape: { + dims: 1078 + dims: 201 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_INT32 + random_data: { + seed: 23 + upper_bound: 100 + lower_bound: 0 + distribution: UNIFORM + } +} +output { + id: "output" + shape: { + dims: 1078 + dims: 201 + } + layout: LAYOUT_ARRAY + dtype: DTYPE_INT32 +} +transform_param: { + alpha: 1.5 + beta: 0.8 +} +test_param: { + error_func: DIFF1 + error_func: DIFF2 + error_threshold: 0.003 + error_threshold: 0.003 + baseline_device: CPU +} diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp new file mode 100644 index 000000000..91c916d94 --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp @@ -0,0 +1,194 @@ +/************************************************************************* + * Copyright (C) [2019-2022] by Cambricon, Inc. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *************************************************************************/ +#include "transform.h" + +namespace mluoptest { + +typedef struct complex { + float real; + float imag; +} ComplexType; + +void TransformExecutor::paramCheck() { + if (!parser_->getProtoNode()->has_transform_param()) { + LOG(ERROR) << "Lose transform param. "; + } + if (parser_->getInputNum() != 1) { + LOG(ERROR) << "transform tensor input number is wrong. "; + } + + if (parser_->getOutputNum() != 1) { + LOG(ERROR) << "transform tensor output number is wrong. "; + } +} + +void TransformExecutor::compute() { + VLOG(4) << "TransformExecutor compute "; + if (!parser_->getProtoNode()->has_transform_param()) { + LOG(ERROR) << "Lose transform param. "; + } + auto tensor_a = tensor_desc_[0].tensor; + auto tensor_out = tensor_desc_[1].tensor; + auto dev_a = data_vector_[0].device_ptr; + auto dev_c = data_vector_[1].device_ptr; + float alpha = 0.0; + float beta = 0.0; + if (tensor_a->dtype != MLUOP_DTYPE_COMPLEX_FLOAT) { + alpha = parser_->getProtoNode()->transform_param().alpha(); + beta = parser_->getProtoNode()->transform_param().beta(); + } + VLOG(4) << "call mluOp transform()"; + interface_timer_.start(); + bool device_interface = true; + char *env_temp = getenv("MLUOP_GTEST_TRANSFORM_ONCHIP_SCALE_PARAM"); + if (env_temp && strcmp(env_temp, "ON") == 0) { + device_interface = true; + } else { + device_interface = false; + } + void *alpha_device = NULL; + void *beta_device = NULL; + if (tensor_a->dtype != MLUOP_DTYPE_COMPLEX_FLOAT) { + alpha_device = mlu_runtime_.allocate(4); + beta_device = mlu_runtime_.allocate(4); + } else { + alpha_device = mlu_runtime_.allocate(sizeof(ComplexType)); + beta_device = mlu_runtime_.allocate(sizeof(ComplexType)); + } + if (tensor_a->dtype == MLUOP_DTYPE_INT32) { + int alpha_int = (int)alpha; + int beta_int = (int)beta; + if (device_interface) { + VLOG(6) << "[gtest]->" + << "[mluOpTransform]" + << ": enable gtest onchip scale param"; + GTEST_CHECK(CNRT_RET_SUCCESS == + cnrtMemcpy(alpha_device, &alpha_int, 4, + CNRT_MEM_TRANS_DIR_HOST2DEV)); + GTEST_CHECK(CNRT_RET_SUCCESS == + cnrtMemcpy(beta_device, &beta_int, 4, + CNRT_MEM_TRANS_DIR_HOST2DEV)); + MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_DEVICE, + alpha_device, tensor_a, dev_a, + beta_device, tensor_out, dev_c)); + } else { + VLOG(6) << "[gtest]->" + << "[mluOpTransform]" + << ": enable gtest host scale param"; + MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_HOST, + &alpha_int, tensor_a, dev_a, + &beta_int, tensor_out, dev_c)); + } + } else if (tensor_a->dtype == MLUOP_DTYPE_COMPLEX_FLOAT) { + ComplexType alpha_complex, beta_complex; + alpha_complex.real = parser_->getProtoNode()->transform_param().alpha(); + alpha_complex.imag = + parser_->getProtoNode()->transform_param().alpha_imag(); + beta_complex.real = parser_->getProtoNode()->transform_param().beta(); + beta_complex.imag = + parser_->getProtoNode()->transform_param().beta_imag(); + if (device_interface) { + VLOG(6) << "[gtest]->" + << "[mluOpTransform]" + << ": enable gtest onchip scale param"; + GTEST_CHECK(CNRT_RET_SUCCESS == cnrtMemcpy(alpha_device, &alpha_complex, + sizeof(ComplexType), + CNRT_MEM_TRANS_DIR_HOST2DEV)); + GTEST_CHECK(CNRT_RET_SUCCESS == cnrtMemcpy(beta_device, &beta_complex, + sizeof(ComplexType), + CNRT_MEM_TRANS_DIR_HOST2DEV)); + MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_DEVICE, + &alpha_complex, tensor_a, dev_a, + &beta_complex, tensor_out, dev_c)); + } else { + VLOG(6) << "[gtest]->" + << "[mluOpTransform]" + << ": enable gtest host scale param"; + MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_HOST, + &alpha_complex, tensor_a, dev_a, + &beta_complex, tensor_out, dev_c)); + } + } else { // float/half/bfloat16 + if (device_interface) { + VLOG(6) << "[gtest]->" + << "[mluOpTransform]" + << ": enable gtest onchip scale param"; + GTEST_CHECK(CNRT_RET_SUCCESS == + cnrtMemcpy(alpha_device, &alpha, 4, CNRT_MEM_TRANS_DIR_HOST2DEV)); + GTEST_CHECK(CNRT_RET_SUCCESS == + cnrtMemcpy(beta_device, &beta, 4, CNRT_MEM_TRANS_DIR_HOST2DEV)); + MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_DEVICE, + alpha_device, tensor_a, dev_a, beta_device, + tensor_out, dev_c)); + } else { + VLOG(6) << "[gtest]->" + << "[mluOpTransform]" + << ": enable gtest host scale param"; + MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_HOST, &alpha, + tensor_a, dev_a, &beta, tensor_out, dev_c)); + } + } + mlu_runtime_.deallocate(alpha_device); + mlu_runtime_.deallocate(beta_device); + interface_timer_.stop(); +} + + +void TransformExecutor::cpuCompute() { + assert(parser_->getInputNum() == 1); + assert(parser_->getOutputNum() == 1); + float alpha = 0.0; + float beta = 0.0; + + auto tensor_a = tensor_desc_[0].tensor; + size_t count1 = parser_->getInputDataCount(0); + if (tensor_a->dtype != MLUOP_DTYPE_COMPLEX_FLOAT) { + alpha = parser_->getProtoNode()->transform_param().alpha(); + beta = parser_->getProtoNode()->transform_param().beta(); + } + + if (tensor_a->dtype == MLUOP_DTYPE_INT32) { + for (size_t i = 0; i < count1; ++i) { + cpu_fp32_output_[0][i] = (int)alpha * cpu_fp32_input_[0][i] + (int)beta; + } + } else if (tensor_a->dtype == MLUOP_DTYPE_COMPLEX_FLOAT) { + ComplexType alpha_complex, beta_complex; + alpha_complex.real = parser_->getProtoNode()->transform_param().alpha(); + alpha_complex.imag = + parser_->getProtoNode()->transform_param().alpha_imag(); + beta_complex.real = parser_->getProtoNode()->transform_param().beta(); + beta_complex.imag = + parser_->getProtoNode()->transform_param().beta_imag(); + for (size_t i = 0; i < count1 * 2; i += 2) { + float a = cpu_fp32_input_[0][i]; + float b = cpu_fp32_input_[0][i + 1]; + float c = alpha_complex.real; + float d = alpha_complex.imag; + cpu_fp32_output_[0][i] = a * c - b * d + beta_complex.real; + cpu_fp32_output_[0][i + 1] = a * d + b * c + beta_complex.imag; + } + } else { // float/half/bfloat16 + for (size_t i = 0; i < count1; ++i) { + cpu_fp32_output_[0][i] = alpha * cpu_fp32_input_[0][i] + beta; + } + } +} + +int64_t TransformExecutor::getTheoryOps() { + int cp_count = 2; + int64_t theory_ops = parser_->getInputDataCount(0) * cp_count; + VLOG(4) << "getTheoryOps: " << theory_ops << " ops"; + return theory_ops; +} + + +} // namespace mluoptest diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h new file mode 100644 index 000000000..3cb7a07a7 --- /dev/null +++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h @@ -0,0 +1,31 @@ + +/************************************************************************* + * Copyright (C) [2019-2022] by Cambricon, Inc. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *************************************************************************/ +#ifndef TEST_MLUOP_GTEST_SRC_ZOO_TRANSFORM_TRANSFORM_H_ +#define TEST_MLUOP_GTEST_SRC_ZOO_TRANSFORM_TRANSFORM_H_ +#include "executor.h" + +namespace mluoptest { + +class TransformExecutor : public Executor { + public: + TransformExecutor() {} + ~TransformExecutor() {} + + void paramCheck(); + void compute(); + void cpuCompute(); + int64_t getTheoryOps() override; +}; + +} // namespace mluoptest +#endif // TEST_MLUOP_GTEST_SRC_ZOO_TRANSFORM_TRANSFORM_H_ diff --git a/docs/bangc-docs/user_guide/9_operators/index.rst b/docs/bangc-docs/user_guide/9_operators/index.rst index ab979ec71..5eba4b230 100644 --- a/docs/bangc-docs/user_guide/9_operators/index.rst +++ b/docs/bangc-docs/user_guide/9_operators/index.rst @@ -951,6 +951,19 @@ mluOpSyncBatchNormBackwardElemt --------------------------------- 该算子用来计算输入的梯度,与 :ref:`sync_batchnorm_backward_reduce` 共同实现了sync_batchnorm_backward。 +.. _transform: + +mluOpTransform +--------------------------------- +该算子Transform算子用于对数据做转换; + +.. math:: + + y_i = alpha \times x_i + beta + +其中 ``i`` 表示一个多元组索引,表示多维张量,例如在4维时可以表示(n,c,h,w)。 +``x_i`` 和 ``y_i`` 表示多元组中 ``i`` 索引处的元素。 + .. _strided_slice: mluOpStridedSlice @@ -969,3 +982,4 @@ mluOpStridedSlice - output为 ``N`` 个维度的张量。 - ``begin`` , ``end`` 和 ``stride`` 均为长度为 ``N`` 的数组,分别表示每个维度切片的起始索引,结束索引和步幅。 +