Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix npu op bug. #3122

Open
wants to merge 62 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
7534dd6
fix npu bug.
momo609 May 13, 2024
ef10dc7
modify chamfer
Annarine Jun 19, 2024
46182c6
Add NPU support for dynamic voxelization
Pr0Wh1teGivee Jun 18, 2024
ee79718
Merge pull request #37 from Pr0Wh1teGivee/rc4main
momo609 Jun 19, 2024
b238850
Bugfix of NPU adapter of nms3d
DaGaiBa Jun 19, 2024
14f031a
repair nms_rotated bug
huhongsun Jun 18, 2024
ffde0d5
Merge pull request #31 from huhongsun/rc4main
momo609 Jun 20, 2024
ca0e427
add dtype check for roi_align
mikaelchan Jun 21, 2024
4aacf0f
Merge pull request #39 from mikaelchan/rc4main
momo609 Jun 21, 2024
2ed5eb5
adapt box_iou_rotaed to boxes_overlap_bev
Jun 21, 2024
d11a531
Merge pull request #38 from DaGaiBa/rc4main
momo609 Jun 22, 2024
a10cfa4
Merge pull request #41 from Pr0Wh1teGivee/iou_fix
momo609 Jun 24, 2024
7c56ba9
fix the bug of DeformableRoiPoolGrad
wujiadi1 Jun 25, 2024
e112ca4
Merge pull request #43 from wujiadi1/rc4main
momo609 Jun 25, 2024
f572475
Interfaces change.
DaGaiBa Jul 19, 2024
44afa6a
Merge pull request #51 from DaGaiBa/rc4main
momo609 Jul 19, 2024
6728a04
chamfer push_back
Annarine Jul 22, 2024
5e0bd89
adapt boxes_overlap_bev & box_iou_rotated
OlaWod Jul 24, 2024
a6d4f0d
fix msda
Pr0Wh1teGivee Aug 5, 2024
0c19b5d
Merge pull request #54 from Pr0Wh1teGivee/msda_fix
momo609 Aug 5, 2024
b7702bf
fix focallossgrad
momo609 Aug 7, 2024
d5085da
Merge pull request #53 from OlaWod/rc4main
momo609 Sep 10, 2024
650c321
points_in_boxes_all的mmcv适配
ZrBac Oct 24, 2024
8c45aa9
Merge pull request #60 from ZrBac/box
hust17yixuan Oct 29, 2024
8f5a5b9
change the knn and three nn code
huangyuan64 Oct 29, 2024
87f00a6
Merge pull request #61 from huangyuan64/rc4main
hust17yixuan Oct 29, 2024
993979b
codeclean npu/boxes_overlap_bev_npu.cpp
OlaWod Oct 30, 2024
94d6e92
adapt npu box_iou_quadri
OlaWod Sep 24, 2024
5260330
Merge pull request #62 from OlaWod/rc4main
hust17yixuan Oct 31, 2024
519195c
Merge pull request #63 from OlaWod/quadri
hust17yixuan Oct 31, 2024
ae66b32
add the roi align rotated v2 ops
huangyuan64 Nov 1, 2024
c29c607
Merge pull request #64 from huangyuan64/rc4
hust17yixuan Nov 1, 2024
ffe3a1a
add assign_score_withk NPU adaptation
Hua-yuxiu Nov 4, 2024
42d93d4
update point_to_voxel & voxel_to_point in scatter_points.py
OlaWod Nov 4, 2024
9634af3
Merge pull request #67 from OlaWod/scatter_points
hust17yixuan Nov 4, 2024
1087e50
Merge pull request #66 from Hua-yuxiu/rc4main_my
hust17yixuan Nov 4, 2024
0535385
border_align
frh23333 Nov 4, 2024
6c48402
Update points_in_boxes.py
ZrBac Nov 5, 2024
96c4d7c
主仓同步
Nov 5, 2024
e3cf445
Merge pull request #70 from ZrBac/patch-2
hust17yixuan Nov 6, 2024
3424ec1
add new npu op roiaware_pool3d
Nov 6, 2024
32997dd
Merge pull request #69 from fangruohuan/rc4main
hust17yixuan Nov 6, 2024
0238175
Merge pull request #73 from JYYCaN/rc4main
hust17yixuan Nov 6, 2024
4a21387
pixel_grou
Annarine Nov 6, 2024
7ee5a41
Merge pull request #76 from Annarine/rc4main
hust17yixuan Nov 6, 2024
eedad49
scatter points bug fix
JYYCaN Nov 7, 2024
1c2c238
Merge pull request #77 from JYYCaN/patch-1
hust17yixuan Nov 7, 2024
7fd21ee
update nms_rotated from openmmlab.mmcv main
abdu-uy Nov 23, 2024
2b7def6
Merge pull request #79 from abdu-uy/rc4main
hust17yixuan Nov 23, 2024
ca26c89
roi_align_rotated_v2
ason-rob Nov 26, 2024
f27fff7
Merge pull request #81 from ason-rob/rc4main
hust17yixuan Nov 27, 2024
cf23718
add pixel_group_npu
Bosco-lab Nov 27, 2024
2ed1460
Merge pull request #82 from Bosco-lab/rc4main
hust17yixuan Nov 29, 2024
a752d17
modify internal calls of npu boxes_overlap_bev & box_iou_rotated
OlaWod Dec 16, 2024
b5c97d4
Merge pull request #83 from OlaWod/boxes
hust17yixuan Dec 18, 2024
0aa84d2
git checkout origin pixel_group
Annarine Jan 7, 2025
10d0767
Merge pull request #84 from Annarine/rc4main
hust17yixuan Jan 7, 2025
540ac07
add impl of assign_score_withk backward
Hua-yuxiu Jan 7, 2025
4095faf
Revert "modify internal calls of npu boxes_overlap_bev & box_iou_rota…
hust17yixuan Jan 8, 2025
721408e
Merge pull request #86 from Hua-yuxiu/rc4main
hust17yixuan Jan 13, 2025
0937678
add diou_npu
Jan 13, 2025
d45099a
Merge pull request #87 from JYYCaN/rc4main
hust17yixuan Jan 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions mmcv/ops/csrc/pytorch/npu/assign_score_withk_npu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "pytorch_npu_helper.hpp"

using namespace NPU_NAME_SPACE;
using namespace std;

void assign_score_withk_forward_npu(int B, int N0, int N1, int M, int K, int O,
int aggregate, const Tensor& points,
const Tensor& centers,
const Tensor& scores,
const Tensor& knn_idx, Tensor& output) {
at::Tensor points_trans = points.permute({0, 3, 1, 2});
at::Tensor centers_trans = centers.permute({0, 3, 1, 2});

EXEC_NPU_CMD(aclnnAssignScoreWithk, points_trans, centers_trans, scores, knn_idx, B, N0, N1, M, K, O, aggregate, output);
}

void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
int aggregate, const Tensor& points,
const Tensor& centers,
const Tensor& scores,
const Tensor& knn_idx, Tensor& output);

REGISTER_NPU_IMPL(assign_score_withk_forward_impl, assign_score_withk_forward_npu);


void assign_score_withk_backward_npu(
int B, int N0, int N1, int M, int K, int O, int aggregate,
const Tensor& grad_out, const Tensor& points, const Tensor& centers,
const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
Tensor& grad_centers, Tensor& grad_scores) {

at::Tensor grad_out_trans = grad_out.permute({0, 2, 3, 1});

EXEC_NPU_CMD(aclnnAssignScoreWithkGrad, grad_out_trans, points, centers, scores, knn_idx, B, N0, N1, M, K, O, aggregate, grad_scores, grad_points, grad_centers);
}

void assign_score_withk_backward_impl(
int B, int N0, int N1, int M, int K, int O, int aggregate,
const Tensor& grad_out, const Tensor& points, const Tensor& centers,
const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
Tensor& grad_centers, Tensor& grad_scores);

REGISTER_NPU_IMPL(assign_score_withk_backward_impl, assign_score_withk_backward_npu);

53 changes: 53 additions & 0 deletions mmcv/ops/csrc/pytorch/npu/border_align_npu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include "pytorch_npu_helper.hpp"

using namespace NPU_NAME_SPACE;
using namespace std;

void border_align_forward_impl(const Tensor &input, const Tensor &boxes, Tensor output,
Tensor argmax_idx, const int pool_size);

void border_align_forward_npu(const Tensor &input, const Tensor &boxes, Tensor output,
Tensor argmax_idx, const int pool_size){
TORCH_CHECK(input.size(0) == boxes.size(0), "The batch sizes of feature map and rois must be the same.");
TORCH_CHECK(input.size(1) % 4 == 0, "The number of channels must be divisible by 4.");
TORCH_CHECK(pool_size >= 2, "The pool size should be larger than 2.");
int32_t batch_size = input.size(0);
int32_t channels = input.size(1);
int32_t height = input.size(2);
int32_t width = input.size(3);
at::Tensor feature_map = input.permute({0, 2, 3, 1}).contiguous();
at::Tensor rois_map = boxes.contiguous();
at::Tensor temp_tensor = at::zeros({batch_size, height * width, pool_size + 1, channels}, input.options());
EXEC_NPU_CMD(aclnnBorderAlign, feature_map, rois_map, pool_size, temp_tensor);
auto max_result = temp_tensor.max(-2);
at::Tensor output_ = std::get<0>(max_result).to(at::kFloat);
output_ = output_.reshape({batch_size, height * width, 4, channels / 4}).permute({0, 3, 1, 2}).contiguous();
output.copy_(output_);
at::Tensor argmax_idx_ = std::get<1>(max_result).to(at::kInt);
argmax_idx_ = argmax_idx_.reshape({batch_size, height * width, 4, channels / 4}).permute({0, 3, 1, 2}).contiguous();
argmax_idx.copy_(argmax_idx_);
}
REGISTER_NPU_IMPL(border_align_forward_impl, border_align_forward_npu);


void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes,
const Tensor &argmax_idx, Tensor grad_input,
const int pool_size);

void border_align_backward_npu(const Tensor &grad_output, const Tensor &boxes,
const Tensor &argmax_idx, Tensor grad_input,
const int pool_size){
TORCH_CHECK(grad_output.dim() == 4, "grad_out.dim() must be 4, but got: ", grad_output.dim());
TORCH_CHECK(boxes.dim() == 3, "idx.dim() must be 3, but got: ", boxes.dim());
TORCH_CHECK(argmax_idx.dim() == 4, "argmax_idx.dim() must be 4, but got: ", argmax_idx.dim());

int32_t batch_size = grad_output.size(0);
int32_t feat_channels = grad_output.size(1) * 4;
int32_t channels = grad_output.size(1);
int32_t box_size = boxes.size(1);
int32_t height = grad_input.size(2);
int32_t width = grad_input.size(3);

EXEC_NPU_CMD(aclnnBorderAlignGrad, grad_output, boxes, argmax_idx, channels, box_size, height, width, pool_size, batch_size, grad_input);
}
REGISTER_NPU_IMPL(border_align_backward_impl, border_align_backward_npu);
5 changes: 3 additions & 2 deletions mmcv/ops/csrc/pytorch/npu/box_iou_quadri_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ using namespace NPU_NAME_SPACE;
using namespace std;

void box_iou_quadri_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned);
const int mode_flag, const bool aligned);

void box_iou_quadri_npu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned) {
const int mode_flag, const bool aligned) {

TORCH_CHECK(boxes1.size(1) == 8, "boxes1 must be 2D tensor (N, 8)");
TORCH_CHECK(boxes1.size(1) == 8, "boxes1 must be 2D tensor (N, 8)");

Expand Down
3 changes: 2 additions & 1 deletion mmcv/ops/csrc/pytorch/npu/box_iou_rotated_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,

void box_iou_rotated_npu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned) {

TORCH_CHECK(boxes1.size(1) == 5, "boxes1 must be 2D tensor (N, 5)");
TORCH_CHECK(boxes1.size(1) == 5, "boxes1 must be 2D tensor (N, 5)");

auto trans = false;
auto is_clockwise = false;
EXEC_NPU_CMD(aclnnBoxesOverlapBev, boxes1, boxes2, trans, is_clockwise,
aligned, mode_flag, ious);
aligned, mode_flag, ious);
return;
}

Expand Down
21 changes: 10 additions & 11 deletions mmcv/ops/csrc/pytorch/npu/boxes_overlap_bev_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,16 @@ void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
void iou3d_boxes_overlap_bev_forward_npu(const int num_a, const Tensor boxes_a,
const int num_b, const Tensor boxes_b,
Tensor ans_overlap) {
TORCH_CHECK(boxes_a.size(1) == 7, "boxes_a must be 2D tensor (N, 7)");
TORCH_CHECK(boxes_b.size(1) == 7, "boxes_b must be 2D tensor (N, 7)");

auto trans = false;
auto is_clockwise = false;
auto aligned = false;
auto mode_flag = 2;
EXEC_NPU_CMD(aclnnBoxesOverlapBev, boxes_a, boxes_b, trans, is_clockwise,
aligned, mode_flag, ans_overlap);
return;
TORCH_CHECK(boxes_a.size(1) == 7, "boxes_a must be 2D tensor (N, 7)");
TORCH_CHECK(boxes_b.size(1) == 7, "boxes_b must be 2D tensor (N, 7)");

auto trans = false;
auto is_clockwise = false;
auto aligned = false;
auto mode_flag = 2;
EXEC_NPU_CMD(aclnnBoxesOverlapBev, boxes_a, boxes_b, trans, is_clockwise, aligned, mode_flag, ans_overlap);
return;
}

REGISTER_NPU_IMPL(iou3d_boxes_overlap_bev_forward_impl,
iou3d_boxes_overlap_bev_forward_npu);
REGISTER_NPU_IMPL(iou3d_boxes_overlap_bev_forward_impl, iou3d_boxes_overlap_bev_forward_npu);
19 changes: 17 additions & 2 deletions mmcv/ops/csrc/pytorch/npu/chamfer_distance_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,34 @@ using namespace std;

void chamfer_distance_forward_npu(Tensor XYZ1, Tensor XYZ2, Tensor dist1,
Tensor dist2, Tensor idx1, Tensor idx2) {
bool is_half = XYZ1.scalar_type() == at::kHalf;
at::Tensor xyz1 = at::ones_like(XYZ1);
at::Tensor xyz2 = at::ones_like(XYZ2);
at::Tensor distf1 = at::ones_like(dist1);
at::Tensor distf2 = at::ones_like(dist2);
xyz1 = XYZ1.transpose(1, 2).transpose(0, 1);
xyz2 = XYZ2.transpose(1, 2).transpose(0, 1);
if (is_half) {
xyz1 = xyz1.to(at::kFloat);
xyz2 = xyz2.to(at::kFloat);
distf1 = dist1.to(at::kFloat);
distf2 = dist2.to(at::kFloat);
}
OpCommand cmd;
cmd.Name("ChamferDistance")
.Input(xyz1)
.Input(xyz2)
.Output(dist1)
.Output(dist2)
.Output(distf1)
.Output(distf2)
.Output(idx1)
.Output(idx2)
.Run();
if (is_half) {
distf1 = distf1.to(at::kHalf);
distf2 = distf2.to(at::kHalf);
}
dist1.copy_(distf1);
dist2.copy_(distf2);
}

void chamfer_distance_backward_npu(Tensor xyz1, Tensor xyz2, Tensor idx1,
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/deform_roi_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void deform_roi_pool_backward_npu(Tensor grad_output, Tensor input, Tensor rois,
.Output(grad_offset)
.Attr("output_size", output_size)
.Attr("spatial_scale", spatial_scale)
.Attr("sample_ratio", sampling_ratio_)
.Attr("sampling_ratio", sampling_ratio_)
.Attr("gamma", gamma)
.Run();
}
Expand Down
28 changes: 28 additions & 0 deletions mmcv/ops/csrc/pytorch/npu/diff_iou_rotated_npu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "pytorch_npu_helper.hpp"
using namespace NPU_NAME_SPACE;
using namespace std;

Tensor diff_iou_rotated_sort_vertices_npu(Tensor vertices,
Tensor mask,
Tensor num_valid) {
TORCH_CHECK(vertices.dim() == 4, "vertices must be a 4D Tensor, but got: ", vertices.dim());
TORCH_CHECK(mask.dim() == 3, "mask must be a 3D Tensor, but got: ", mask.dim());
TORCH_CHECK(num_valid.dim() == 2, "num_valid must be a 2D Tensor, but got: ", num_valid.dim());

uint32_t B = vertices.size(0);
uint32_t N = vertices.size(1);

at::Tensor sortedIdx = at::empty({B, N, 9}, num_valid.options());
at::Tensor mask_fp = mask.to(at::kFloat);

EXEC_NPU_CMD(aclnnDiffIouRotatedSortVertices, vertices, mask_fp, num_valid, sortedIdx);

return sortedIdx;
}

Tensor diff_iou_rotated_sort_vertices_forward_impl(Tensor vertices,
Tensor mask,
Tensor num_valid);

REGISTER_NPU_IMPL(diff_iou_rotated_sort_vertices_forward_impl,
diff_iou_rotated_sort_vertices_npu);
Loading