From 4d2653e8390adcbc6fe56e004f5228b77bf8311a Mon Sep 17 00:00:00 2001
From: Tingyu <49021685+TingyuZhangSie@users.noreply.github.com>
Date: Mon, 16 Oct 2023 10:41:11 +0800
Subject: [PATCH] [Feature](bangc-ops): add transform binary oprator. (#839)

Co-authored-by: zhangtingyu <zhangtingyu@cambricon.com>
---
 bangc-ops/kernels/kernel_wrapper/wrapper.h    |  12 +-
 bangc-ops/kernels/transform/transform.cpp     |  40 ++++
 bangc-ops/mlu_op.h                            |  95 +++++++++
 .../zoo/transform/test_case/case_0.prototxt   |  49 +++++
 .../zoo/transform/test_case/case_1.prototxt   |  41 ++++
 .../zoo/transform/test_case/complex.prototxt  |  37 ++++
 .../zoo/transform/test_case/half_0.prototxt   |  37 ++++
 .../zoo/transform/test_case/int32_0.prototxt  |  37 ++++
 .../pb_gtest/src/zoo/transform/transform.cpp  | 194 ++++++++++++++++++
 .../pb_gtest/src/zoo/transform/transform.h    |  31 +++
 .../user_guide/9_operators/index.rst          |  14 ++
 11 files changed, 586 insertions(+), 1 deletion(-)
 create mode 100644 bangc-ops/kernels/transform/transform.cpp
 create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt
 create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt
 create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt
 create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt
 create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt
 create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp
 create mode 100644 bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h

diff --git a/bangc-ops/kernels/kernel_wrapper/wrapper.h b/bangc-ops/kernels/kernel_wrapper/wrapper.h
index 30063c935..10eea30cb 100644
--- a/bangc-ops/kernels/kernel_wrapper/wrapper.h
+++ b/bangc-ops/kernels/kernel_wrapper/wrapper.h
@@ -276,6 +276,16 @@
       const mluOpTensorDescriptor_t, const void *,               \
       const mluOpTensorDescriptor_t, void *diff_x
 
+#define TRANSFORM_PARAM_TYPE                                                  \
+    mluOpHandle_t,                                                            \
+    const mluOpPointerMode_t,                                                 \
+    const void *,                                                             \
+    const mluOpTensorDescriptor_t,                                            \
+    const void *,                                                             \
+    const void *,                                                             \
+    const mluOpTensorDescriptor_t,                                            \
+    void *
+
 #define STRIDEDSLICE_PARAM_TYPE                               \
   mluOpHandle_t, const mluOpTensorDescriptor_t, const void *, \
       const int *, const int *, const int *,      \
@@ -306,7 +316,6 @@ KERNEL_REGISTER(RoiAlignBackward, ROIALIGNBACKWARD_PARAM_TYPE);
 KERNEL_REGISTER(RoiAlignBackwardV2, ROIALIGNBACKWARD_V2_PARAM_TYPE);
 KERNEL_REGISTER(RoiPoolingForward, ROIPOOLINGFORWARD_PARAM_TYPE);
 KERNEL_REGISTER(RoiPoolingBackward, ROIPOOLINGBACKWARD_PARAM_TYPE);
-KERNEL_REGISTER(transform, TRANSFORM_PARAM_TYPE);
 KERNEL_REGISTER(SyncBatchNormStats, SYNCBATCHNORMSTATS_PARAM_TYPE);
 KERNEL_REGISTER(SyncBatchNormStatsV2, SYNCBATCHNORMSTATS_V2_PARAM_TYPE);
 KERNEL_REGISTER(SyncBatchNormGatherStatsWithCounts,
@@ -320,5 +329,6 @@ KERNEL_REGISTER(SyncBatchNormBackwardElemt,
         SYNCBATCHNORMBACKWARDELEMT_PARAM_TYPE);
 KERNEL_REGISTER(SyncBatchNormBackwardElemtV2,
         SYNCBATCHNORMBACKWARDELEMT_V2_PARAM_TYPE);
+KERNEL_REGISTER(transform, TRANSFORM_PARAM_TYPE);
 KERNEL_REGISTER(StridedSlice, STRIDEDSLICE_PARAM_TYPE);
 #endif  // KERNELS_KERNEL_WRAPPER_WRAPPER_H
diff --git a/bangc-ops/kernels/transform/transform.cpp b/bangc-ops/kernels/transform/transform.cpp
new file mode 100644
index 000000000..466a6abd3
--- /dev/null
+++ b/bangc-ops/kernels/transform/transform.cpp
@@ -0,0 +1,40 @@
+/*************************************************************************
+ * Copyright (C) [2023] by Cambricon, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#include "kernels/kernel_wrapper/wrapper.h"
+
+mluOpStatus_t MLUOP_WIN_API mluOpTransform(
+    mluOpHandle_t handle,
+    const mluOpPointerMode_t pointer_mode,
+    const void *alpha,
+    const mluOpTensorDescriptor_t input_desc,
+    const void *input,
+    const void *beta,
+    const mluOpTensorDescriptor_t output_desc,
+    void *output) {
+  transformWrapper wrapper;
+  mluOpStatus_t ret = wrapper.invoke(
+      handle, pointer_mode, alpha, input_desc, input,
+      beta, output_desc, output);
+  return ret;
+}
+
diff --git a/bangc-ops/mlu_op.h b/bangc-ops/mlu_op.h
index e6cd5468b..1f94993cf 100644
--- a/bangc-ops/mlu_op.h
+++ b/bangc-ops/mlu_op.h
@@ -15096,6 +15096,101 @@ mluOpSyncBatchNormBackwardElemtV2(mluOpHandle_t handle,
                                   const mluOpTensorDescriptor_t diff_x_desc,
                                   void *diff_x);
 
+// Group:Transform
+/*!
+ * @brief Linearly transforms an input tensor using the following formula:
+ *
+ * output = alpha * input + beta
+ *
+ *   Parameters \b input and \b output represent tensor input and output,
+ *  and \b alpha and \b beta represent scale factors.
+ *  \b mluOpTransform supports scale pointer
+ *  from both host and device for \b alpha and \b beta parameter.
+ * @param[in] handle
+ *   Handle to a Cambricon MLUOP context used to manage MLU
+ *  devices and queues.
+ *  For detailed information, please refer to ::mluOpHandle_t.
+ * @param[in] input_desc
+ *   Descriptor of the input tensor \b.
+ *  Refer to ::mluOpTensorDescriptor_t for further detail.
+ * @param[in] input
+ *   Device pointer to the MLU memory that stores the input tensor.
+ * @param[in] pointer_mode
+ *   An enum value that indicates which mode scalar values \b alpha
+ * and \b beta would be passed by on the host or device.
+ * Mode type is defined in ::mluOpPointerMode_t.
+ * @param[in] alpha
+ *   Pointer to scaling factor of tensor input.
+ *   If \b pointer_mode is \b MLUOP_POINTER_MODE_DEVICE, \b alpha
+ *  should be a device pointer.
+ *   If \b pointer_mode is \b MLUOP_POINTER_MODE_HOST, \b alpha
+ *  should be a host pointer.
+ * @param[in] beta
+ *   Pointer to scaling factor of tensor input.
+ *   If \b pointer_mode is \b MLUOP_POINTER_MODE_DEVICE, \b beta
+ *  should be a device pointer.
+ *   If \b pointer_mode is \b MLUOP_POINTER_MODE_HOST, \b beta
+ *  should be a host pointer.
+ * @param[in] output_desc
+ *   Descriptor of the output tensor \b output. For detailed
+ *  information, please refer to ::mluOpTensorDescriptor_t.
+ * @param[out] output
+ *   Descriptor of the output tensor. For detailed information, see
+ *   ::mluOpTensorDescriptor_t.
+ *
+ * @par Return
+ * - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
+ *
+ * @par Formula
+ * - See "Transform Operator" section in "Cambricon MLUOP User Guide" for
+ * details.
+ *
+ * @par Data Type
+ * - The combinations of the data type for input tensor \b input
+ *   and output tensor
+ *   \b output must be half-half, float-float or int32-int32.
+ * - \b alpha and \b beta: If data type of tensors is float or half, the data
+ *   type of \b alpha and \b beta should be float pointer.
+ *   If data type of tensors is
+ *   int32, the data type of \b alpha and \b beta should be int pointer.
+ *
+ * @par Scale Limitation
+ * - Tensor descriptors of input and output tensors must be the same.
+ * - Dimension number should not exceeds \p MLUOP_DIM_MAX.
+ *
+ * @note
+ * - None.
+ *
+ * @par Requirements
+ * - None.
+ *
+ * @par Example
+     @verbatim
+       Input tensor   :   [[1, 2, 3],
+                           [4, 5, 6],
+                           [7, 8, 9]]
+
+       alpha          :   2
+
+       beta           :   1
+
+       Output tensor  :   [[3,  5,  7],
+                           [9,  11, 13],
+                           [15, 17, 19]]
+     @endverbatim
+ *
+ */
+
+mluOpStatus_t MLUOP_WIN_API
+mluOpTransform(mluOpHandle_t handle,
+               const mluOpPointerMode_t pointer_mode,
+               const void *alpha,
+               const mluOpTensorDescriptor_t input_desc,
+               const void *input,
+               const void *beta,
+               const mluOpTensorDescriptor_t output_desc,
+               void *output);
+
 // Group:StridedSlice
 /*!
  * @brief Extracts a slice of size ``(end - begin) / stride`` from the given \p input tensor.
diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt
new file mode 100644
index 000000000..0ec25f836
--- /dev/null
+++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_0.prototxt
@@ -0,0 +1,49 @@
+op_name: "transform"
+op_type: "TRANSFORM"
+input {
+  id: "input1"
+  shape: {
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1000
+    dims: 2000
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_FLOAT
+  random_data: {
+    seed: 23
+    upper_bound: 100
+    lower_bound: 0
+    distribution: UNIFORM
+  }
+}
+output {
+  id: "output"
+  shape: {
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1
+    dims: 1000
+    dims: 2000
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_FLOAT
+}
+transform_param: {
+  alpha: 1
+  beta: 1
+}
+test_param: {
+  error_func: DIFF1
+  error_func: DIFF2
+  error_threshold: 0.003
+  error_threshold: 0.003
+  baseline_device: CPU
+}
diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt
new file mode 100644
index 000000000..c1366ef7d
--- /dev/null
+++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/case_1.prototxt
@@ -0,0 +1,41 @@
+op_name: "transform"
+op_type: "TRANSFORM"
+input {
+  id: "input1"
+  shape: {
+    dims: 13
+    dims: 17
+    dims: 100
+    dims: 70
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_FLOAT
+  random_data: {
+    seed: 23
+    upper_bound: 100
+    lower_bound: 0
+    distribution: UNIFORM
+  }
+}
+output {
+  id: "output"
+  shape: {
+    dims: 13
+    dims: 17
+    dims: 100
+    dims: 70
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_FLOAT
+}
+transform_param: {
+  alpha: 1
+  beta: 1
+}
+test_param: {
+  error_func: DIFF1
+  error_func: DIFF2
+  error_threshold: 0.003
+  error_threshold: 0.003
+  baseline_device: CPU
+}
diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt
new file mode 100644
index 000000000..23d388fd6
--- /dev/null
+++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/complex.prototxt
@@ -0,0 +1,37 @@
+op_name: "transform"
+op_type: "TRANSFORM"
+input {
+  id: "input1"
+  shape: {
+    dims: 1000
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_COMPLEX_FLOAT
+  random_data: {
+    seed: 23
+    upper_bound: 100
+    lower_bound: 0
+    distribution: UNIFORM
+  }
+}
+output {
+  id: "output"
+  shape: {
+    dims: 1000
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_COMPLEX_FLOAT
+}
+transform_param: {
+  alpha: 1.1
+  alpha_imag:1.1
+  beta: 1.2
+  beta_imag:1.1
+}
+test_param: {
+  error_func: DIFF1
+  error_func: DIFF2
+  error_threshold: 0.003
+  error_threshold: 0.003
+  baseline_device: CPU
+}
diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt
new file mode 100644
index 000000000..5e55b0b18
--- /dev/null
+++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/half_0.prototxt
@@ -0,0 +1,37 @@
+op_name: "transform"
+op_type: "TRANSFORM"
+input {
+  id: "input1"
+  shape: {
+    dims: 1078
+    dims: 201
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_HALF
+  random_data: {
+    seed: 23
+    upper_bound: 100
+    lower_bound: 0
+    distribution: UNIFORM
+  }
+}
+output {
+  id: "output"
+  shape: {
+    dims: 1078
+    dims: 201
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_HALF
+}
+transform_param: {
+  alpha: 1.5
+  beta: 0.8
+}
+test_param: {
+  error_func: DIFF1
+  error_func: DIFF2
+  error_threshold: 0.003
+  error_threshold: 0.003
+  baseline_device: CPU
+}
diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt
new file mode 100644
index 000000000..ce449f367
--- /dev/null
+++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/test_case/int32_0.prototxt
@@ -0,0 +1,37 @@
+op_name: "transform"
+op_type: "TRANSFORM"
+input {
+  id: "input1"
+  shape: {
+    dims: 1078
+    dims: 201
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_INT32
+  random_data: {
+    seed: 23
+    upper_bound: 100
+    lower_bound: 0
+    distribution: UNIFORM
+  }
+}
+output {
+  id: "output"
+  shape: {
+    dims: 1078
+    dims: 201
+  }
+  layout: LAYOUT_ARRAY
+  dtype: DTYPE_INT32
+}
+transform_param: {
+  alpha: 1.5
+  beta: 0.8
+}
+test_param: {
+  error_func: DIFF1
+  error_func: DIFF2
+  error_threshold: 0.003
+  error_threshold: 0.003
+  baseline_device: CPU
+}
diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp
new file mode 100644
index 000000000..91c916d94
--- /dev/null
+++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.cpp
@@ -0,0 +1,194 @@
+/*************************************************************************
+ * Copyright (C) [2019-2022] by Cambricon, Inc.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#include "transform.h"
+
+namespace mluoptest {
+
+typedef struct complex {
+  float real;
+  float imag;
+} ComplexType;
+
+void TransformExecutor::paramCheck() {
+  if (!parser_->getProtoNode()->has_transform_param()) {
+    LOG(ERROR) << "Lose transform param. ";
+  }
+  if (parser_->getInputNum() != 1) {
+    LOG(ERROR) << "transform tensor input number is wrong. ";
+  }
+
+  if (parser_->getOutputNum() != 1) {
+    LOG(ERROR) << "transform tensor output number is wrong. ";
+  }
+}
+
+void TransformExecutor::compute() {
+  VLOG(4) << "TransformExecutor compute ";
+  if (!parser_->getProtoNode()->has_transform_param()) {
+    LOG(ERROR) << "Lose transform param. ";
+  }
+  auto tensor_a = tensor_desc_[0].tensor;
+  auto tensor_out = tensor_desc_[1].tensor;
+  auto dev_a = data_vector_[0].device_ptr;
+  auto dev_c = data_vector_[1].device_ptr;
+  float alpha = 0.0;
+  float beta = 0.0;
+  if (tensor_a->dtype != MLUOP_DTYPE_COMPLEX_FLOAT) {
+    alpha = parser_->getProtoNode()->transform_param().alpha();
+    beta = parser_->getProtoNode()->transform_param().beta();
+  }
+  VLOG(4) << "call mluOp transform()";
+  interface_timer_.start();
+  bool device_interface = true;
+  char *env_temp = getenv("MLUOP_GTEST_TRANSFORM_ONCHIP_SCALE_PARAM");
+  if (env_temp && strcmp(env_temp, "ON") == 0) {
+    device_interface = true;
+  } else {
+    device_interface = false;
+  }
+  void *alpha_device = NULL;
+  void *beta_device = NULL;
+  if (tensor_a->dtype != MLUOP_DTYPE_COMPLEX_FLOAT) {
+    alpha_device = mlu_runtime_.allocate(4);
+    beta_device = mlu_runtime_.allocate(4);
+  } else {
+    alpha_device = mlu_runtime_.allocate(sizeof(ComplexType));
+    beta_device = mlu_runtime_.allocate(sizeof(ComplexType));
+  }
+  if (tensor_a->dtype == MLUOP_DTYPE_INT32) {
+    int alpha_int = (int)alpha;
+    int beta_int = (int)beta;
+    if (device_interface) {
+      VLOG(6) << "[gtest]->"
+              << "[mluOpTransform]"
+              << ": enable gtest onchip scale param";
+      GTEST_CHECK(CNRT_RET_SUCCESS ==
+                  cnrtMemcpy(alpha_device, &alpha_int, 4,
+                             CNRT_MEM_TRANS_DIR_HOST2DEV));
+      GTEST_CHECK(CNRT_RET_SUCCESS ==
+                  cnrtMemcpy(beta_device, &beta_int, 4,
+                             CNRT_MEM_TRANS_DIR_HOST2DEV));
+      MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_DEVICE,
+                                  alpha_device, tensor_a, dev_a,
+                                  beta_device, tensor_out, dev_c));
+    } else {
+      VLOG(6) << "[gtest]->"
+              << "[mluOpTransform]"
+              << ": enable gtest host scale param";
+      MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_HOST,
+                                  &alpha_int, tensor_a, dev_a,
+                                  &beta_int, tensor_out, dev_c));
+    }
+  } else if (tensor_a->dtype == MLUOP_DTYPE_COMPLEX_FLOAT) {
+    ComplexType alpha_complex, beta_complex;
+    alpha_complex.real = parser_->getProtoNode()->transform_param().alpha();
+    alpha_complex.imag =
+          parser_->getProtoNode()->transform_param().alpha_imag();
+    beta_complex.real = parser_->getProtoNode()->transform_param().beta();
+    beta_complex.imag =
+           parser_->getProtoNode()->transform_param().beta_imag();
+    if (device_interface) {
+      VLOG(6) << "[gtest]->"
+              << "[mluOpTransform]"
+              << ": enable gtest onchip scale param";
+      GTEST_CHECK(CNRT_RET_SUCCESS == cnrtMemcpy(alpha_device, &alpha_complex,
+                                                 sizeof(ComplexType),
+                                                 CNRT_MEM_TRANS_DIR_HOST2DEV));
+      GTEST_CHECK(CNRT_RET_SUCCESS == cnrtMemcpy(beta_device, &beta_complex,
+                                                 sizeof(ComplexType),
+                                                 CNRT_MEM_TRANS_DIR_HOST2DEV));
+      MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_DEVICE,
+                                 &alpha_complex, tensor_a, dev_a,
+                                 &beta_complex, tensor_out, dev_c));
+    } else {
+      VLOG(6) << "[gtest]->"
+              << "[mluOpTransform]"
+              << ": enable gtest host scale param";
+      MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_HOST,
+                                 &alpha_complex, tensor_a, dev_a,
+                                 &beta_complex, tensor_out, dev_c));
+    }
+  } else {  // float/half/bfloat16
+    if (device_interface) {
+      VLOG(6) << "[gtest]->"
+              << "[mluOpTransform]"
+              << ": enable gtest onchip scale param";
+      GTEST_CHECK(CNRT_RET_SUCCESS ==
+           cnrtMemcpy(alpha_device, &alpha, 4, CNRT_MEM_TRANS_DIR_HOST2DEV));
+      GTEST_CHECK(CNRT_RET_SUCCESS ==
+           cnrtMemcpy(beta_device, &beta, 4, CNRT_MEM_TRANS_DIR_HOST2DEV));
+      MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_DEVICE,
+                                 alpha_device, tensor_a, dev_a, beta_device,
+                                 tensor_out, dev_c));
+    } else {
+      VLOG(6) << "[gtest]->"
+              << "[mluOpTransform]"
+              << ": enable gtest host scale param";
+      MLUOP_CHECK(mluOpTransform(handle_, MLUOP_POINTER_MODE_HOST, &alpha,
+                                 tensor_a, dev_a, &beta, tensor_out, dev_c));
+    }
+  }
+  mlu_runtime_.deallocate(alpha_device);
+  mlu_runtime_.deallocate(beta_device);
+  interface_timer_.stop();
+}
+
+
+void TransformExecutor::cpuCompute() {
+  assert(parser_->getInputNum() == 1);
+  assert(parser_->getOutputNum() == 1);
+  float alpha = 0.0;
+  float beta = 0.0;
+
+  auto tensor_a = tensor_desc_[0].tensor;
+  size_t count1 = parser_->getInputDataCount(0);
+  if (tensor_a->dtype != MLUOP_DTYPE_COMPLEX_FLOAT) {
+    alpha = parser_->getProtoNode()->transform_param().alpha();
+    beta = parser_->getProtoNode()->transform_param().beta();
+  }
+
+  if (tensor_a->dtype == MLUOP_DTYPE_INT32) {
+    for (size_t i = 0; i < count1; ++i) {
+      cpu_fp32_output_[0][i] = (int)alpha * cpu_fp32_input_[0][i] + (int)beta;
+    }
+  } else if (tensor_a->dtype == MLUOP_DTYPE_COMPLEX_FLOAT) {
+    ComplexType alpha_complex, beta_complex;
+    alpha_complex.real = parser_->getProtoNode()->transform_param().alpha();
+    alpha_complex.imag =
+            parser_->getProtoNode()->transform_param().alpha_imag();
+    beta_complex.real = parser_->getProtoNode()->transform_param().beta();
+    beta_complex.imag =
+            parser_->getProtoNode()->transform_param().beta_imag();
+    for (size_t i = 0; i < count1 * 2; i += 2) {
+      float a = cpu_fp32_input_[0][i];
+      float b = cpu_fp32_input_[0][i + 1];
+      float c = alpha_complex.real;
+      float d = alpha_complex.imag;
+      cpu_fp32_output_[0][i] = a * c - b * d + beta_complex.real;
+      cpu_fp32_output_[0][i + 1] = a * d + b * c + beta_complex.imag;
+    }
+  } else {  // float/half/bfloat16
+    for (size_t i = 0; i < count1; ++i) {
+      cpu_fp32_output_[0][i] = alpha * cpu_fp32_input_[0][i] + beta;
+    }
+  }
+}
+
+int64_t TransformExecutor::getTheoryOps() {
+  int cp_count = 2;
+  int64_t theory_ops = parser_->getInputDataCount(0) * cp_count;
+  VLOG(4) << "getTheoryOps: " << theory_ops << " ops";
+  return theory_ops;
+}
+
+
+}  // namespace mluoptest
diff --git a/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h
new file mode 100644
index 000000000..3cb7a07a7
--- /dev/null
+++ b/bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/transform/transform.h
@@ -0,0 +1,31 @@
+
+/*************************************************************************
+ * Copyright (C) [2019-2022] by Cambricon, Inc.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#ifndef TEST_MLUOP_GTEST_SRC_ZOO_TRANSFORM_TRANSFORM_H_
+#define TEST_MLUOP_GTEST_SRC_ZOO_TRANSFORM_TRANSFORM_H_
+#include "executor.h"
+
+namespace mluoptest {
+
+class TransformExecutor : public Executor {
+ public:
+  TransformExecutor() {}
+  ~TransformExecutor() {}
+
+  void paramCheck();
+  void compute();
+  void cpuCompute();
+  int64_t getTheoryOps() override;
+};
+
+}  // namespace mluoptest
+#endif  // TEST_MLUOP_GTEST_SRC_ZOO_TRANSFORM_TRANSFORM_H_
diff --git a/docs/bangc-docs/user_guide/9_operators/index.rst b/docs/bangc-docs/user_guide/9_operators/index.rst
index ab979ec71..5eba4b230 100644
--- a/docs/bangc-docs/user_guide/9_operators/index.rst
+++ b/docs/bangc-docs/user_guide/9_operators/index.rst
@@ -951,6 +951,19 @@ mluOpSyncBatchNormBackwardElemt
 ---------------------------------
 该算子用来计算输入的梯度，与 :ref:`sync_batchnorm_backward_reduce` 共同实现了sync_batchnorm_backward。
 
+.. _transform:
+
+mluOpTransform
+---------------------------------
+该算子Transform算子用于对数据做转换；
+
+.. math::
+ 
+  y_i = alpha \times x_i + beta
+
+其中 ``i`` 表示一个多元组索引，表示多维张量，例如在4维时可以表示(n,c,h,w)。
+``x_i`` 和 ``y_i`` 表示多元组中 ``i`` 索引处的元素。
+
 .. _strided_slice:
 
 mluOpStridedSlice
@@ -969,3 +982,4 @@ mluOpStridedSlice
 - output为 ``N`` 个维度的张量。
 - ``begin`` ， ``end`` 和 ``stride`` 均为长度为 ``N`` 的数组，分别表示每个维度切片的起始索引，结束索引和步幅。
 
+