Cambricon · PetrelYy · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/bangc-ops/kernel_depends.toml b/bangc-ops/kernel_depends.toml
@@ -12,17 +12,17 @@ div = ["binary_op"]
 expand = ["copy"]
 fill = ["tensor_stride_process"]
 log = ["unary_op"]
-psroipool = ["fill_zero"]
+psroipool = ["fill"]
 roi_align_rotated = ["fill"]
-roi_crop = ["fill_zero"]
+roi_crop = ["fill"]
 rotated_feature_align = ["fill"]
 sqrt = ["binary_op", "unary_op"]
 tensor_stride_process = ["copy"]
 moe_dispatch_backward_data = ["fill"]
 roiaware_pool3d = ["fill","transpose"]
 voxelization = ["fill"]
 get_indice_pairs = ["fill", "scatter_nd", " gather_nd", "reduce", "unique"]
-yolo_box = ["fill_zero"]
+yolo_box = ["fill"]
 deform_roi_pool = ["fill"]
 moe_dispatch_backward_gate = ["fill"]
 indice_convolution_backward_filter = ["fill", "transpose", "gather_nd", "matmul"]

diff --git a/bangc-ops/kernels/fill_zero/fill_zero.h b/bangc-ops/kernels/fill_zero/fill_zero.h
diff --git a/bangc-ops/kernels/fill_zero/fill_zero.mlu b/bangc-ops/kernels/fill_zero/fill_zero.mlu
diff --git a/bangc-ops/kernels/psroipool/psroipool.cpp b/bangc-ops/kernels/psroipool/psroipool.cpp
@@ -24,12 +24,12 @@
 
 #include <string>
 
+#include "core/context.h"
 #include "core/gen_case.h"
 #include "core/logging.h"
 #include "core/runtime/device.h"
 #include "core/tensor.h"
 #include "core/type.h"
-#include "kernels/fill_zero/fill_zero.h"
 #include "kernels/kernel.h"
 
 // policy function
@@ -328,10 +328,9 @@ mluOpStatus_t MLUOP_WIN_API mluOpPsRoiPoolBackward(
           << ", " << k_dim.z << "].";
 
   // gdram set zero
-  int gdramset_size = channels * width * height * batch_size * sizeof(float);
-  KERNEL_CHECK((KernelFillZero(k_dim, k_type, handle->queue, gdramset_size,
-                               bottom_grad)));
-  VLOG(5) << "Kernel KernelFillZero.";
+  float fill_value = 0;
+  MLUOP_CHECK(mluOpFill_v3(handle, MLUOP_POINTER_MODE_HOST, &fill_value,
+                           bottom_grad_desc, bottom_grad));
 
   KERNEL_CHECK((KernelPsRoiPoolBackward(
       k_dim, k_type, handle->queue, top_grad, mapping_channel, rois,

diff --git a/bangc-ops/kernels/roi_crop/roi_crop.cpp b/bangc-ops/kernels/roi_crop/roi_crop.cpp
@@ -30,7 +30,6 @@
 #include "core/runtime/device.h"
 #include "core/tensor.h"
 #include "core/type.h"
-#include "kernels/fill_zero/fill_zero.h"
 
 static void policyFunc(const mluOpHandle_t handle, int bin_num,
                        cnrtDim3_t *k_dim, cnrtFunctionType_t *k_type) {
@@ -251,10 +250,9 @@ mluOpStatus_t MLUOP_WIN_API mluOpRoiCropBackward(
   VLOG(5) << "[mluOpRoiCropBackward] launch kernel policyFunc[" << k_dim.x
           << ", " << k_dim.y << ", " << k_dim.z << "].";
   // gdram set zero
-  int gd_num = channels * width * height * batch * sizeof(float);
-  CHECK_RETURN("[FillZero]", (KernelFillZero(k_dim, k_type, handle->queue,
-                                             gd_num, grad_input)));
-  VLOG(5) << "Kernel KernelFillZero.";
+  float fill_value = 0;
+  MLUOP_CHECK(mluOpFill_v3(handle, MLUOP_POINTER_MODE_HOST, &fill_value,
+                           grad_input_desc, grad_input));
 
   CHECK_RETURN("[mluOpRoiCropBackward]",
                KernelRoiCropBackward(k_dim, k_type, handle->queue, grad_output,

diff --git a/bangc-ops/kernels/voxelization/voxelization.cpp b/bangc-ops/kernels/voxelization/voxelization.cpp
@@ -30,7 +30,6 @@
 #include "core/runtime/device.h"
 #include "core/tensor.h"
 #include "core/type.h"
-#include "kernels/fill_zero/fill_zero.h"
 
 static void policyFuncDefault(const mluOpHandle_t handle,
                               const size_t num_points, cnrtDim3_t *k_dim,

diff --git a/bangc-ops/kernels/yolo_box/yolo_box.cpp b/bangc-ops/kernels/yolo_box/yolo_box.cpp
@@ -30,7 +30,6 @@
 #include "core/runtime/device.h"
 #include "core/tensor.h"
 #include "core/type.h"
-#include "kernels/fill_zero/fill_zero.h"
 
 #define MAX_CLASS_NUM_ARCH_200 1534
 #define MAX_CLASS_NUM_ARCH_300 2558
@@ -188,15 +187,13 @@ mluOpStatus_t MLUOP_WIN_API mluOpYoloBox(
   VLOG(5) << "[mluOpYoloBox] launch kernel policyFunc[" << k_dim.x << ", "
           << k_dim.y << ", " << k_dim.z << "].";
 
-  const int boxes_size = n_in * anchor_s * 4 * h_in * w_in * sizeof(float);
-  CHECK_RETURN("[FillZero]", (KernelFillZero(k_dim, k_type, handle->queue,
-                                             boxes_size, boxes)));
-
-  const int scores_size =
-      n_in * anchor_s * class_num * h_in * w_in * sizeof(float);
-  CHECK_RETURN("[FillZero]", (KernelFillZero(k_dim, k_type, handle->queue,
-                                             scores_size, scores)));
-
+  float fill_value = 0;
+  MLUOP_CHECK(mluOpFill_v3(handle, MLUOP_POINTER_MODE_HOST, &fill_value,
+                           boxes_desc, boxes));
+
+  MLUOP_CHECK(mluOpFill_v3(handle, MLUOP_POINTER_MODE_HOST, &fill_value,
+                           scores_desc, scores));
+
   CHECK_RETURN("[mluOpYoloBox]",
                KernelYoloBox(k_dim, k_type, handle->queue, x, img_size, anchors,
                              class_num, conf_thresh, downsample_ratio,

diff --git a/docs/bangc-docs/BANGC-OPS-OpList.md b/docs/bangc-docs/BANGC-OPS-OpList.md
@@ -49,7 +49,6 @@ MLU Binary Op算子结构：
 | dynamic_point_to_voxel_forward         | √             |               |
 | expand                                 |               | √             |
 | fill                                   |               | √             |
-| fill_zero                              | √             |               |
 | focal_loss_sigmoid_backward            | √             |               |
 | focal_loss_sigmoid_forward             | √             |               |
 | gather_nd                              |               | √             |