Skip to content

Commit

Permalink
[Feature](bangc_ops): fix r0.4 fill copy expand ops and update ci yam…
Browse files Browse the repository at this point in the history
…l. (#475)

Co-authored-by: ZhangLearning <[email protected]>
  • Loading branch information
ZhangLearning and ZhangLearning authored Mar 8, 2023
1 parent 9cf9959 commit c2295c8
Show file tree
Hide file tree
Showing 21 changed files with 221 additions and 274 deletions.
Original file line number Diff line number Diff line change
@@ -1,62 +1,71 @@
name: bangcops_release_test
name: bangc_all_system_test

on:
push:
branches: [master, r*]
paths:
- 'bangc-ops/kernels/kernel_wrapper/**'
- 'bangc-ops/CMakeLists.txt'
- 'bangc-ops/independent_build.sh'
tags:
- v*
pull_request:
paths:
- '.github/workflows/bangc_all_system_ci.yaml'

jobs:
test:
strategy:
matrix:
runner: [mlu270-x5k, mlu290-m5, mlu370-m8]
os: [ubuntu18.04, ubuntu20.04, debian9, debian10, centos7, centos8]
mlu_ops_version : [v0.4.2]
cntoolkit_version : [cntoolkit3.0.2]
os: [ubuntu18.04, ubuntu20.04, debian10, centos7, centos8]
runs-on: ${{matrix.runner}}
steps:
- uses: actions/checkout@v3

- name: pull_images
run: |
docker pull docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-${{matrix.os}}
docker pull docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-${{matrix.os}}-${{matrix.cntoolkit_version}}
- name: build_bangc_ops
run: >
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-${{matrix.os}}
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-${{matrix.os}}-${{matrix.cntoolkit_version}}
./build.sh --sub_module=bangc
- name: mlu_ops_version_check
run: >
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-${{matrix.os}}
bash version_check.sh 0.4.1
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-${{matrix.os}}-${{matrix.cntoolkit_version}}
bash version_check.sh 0.4.2
- name: bangc_ops_release_test_cases
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-${{matrix.os}}
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-${{matrix.os}}-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_test/default_platform
- name: bangc_ops_release_temp_cases
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-${{matrix.os}}
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-${{matrix.os}}-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_temp/default_platform
- name: bangc_ops_release_test_370_cases
if: matrix.runner == 'mlu370-m8'
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-${{matrix.os}}
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-${{matrix.os}}-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_test/370
- name: bangc_ops_release_temp_370_cases
if: matrix.runner == 'mlu370-m8'
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-${{matrix.os}}
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-${{matrix.os}}-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_temp/370
- name: clean
run: |
rm -rf bangc-ops/build
42 changes: 38 additions & 4 deletions .github/workflows/ci.yaml → .github/workflows/bangc_ci.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,50 @@
name: ci
name: bangc_ci

on:
push:
branches: [master, r*]
paths-ignore:
- 'docs/**'
- 'bangpy-ops/**'
- 'docker/**'
- 'samples/**'
- 'installer/**'
- '.github/ISSUE_TEMPLATE/**'
- '.github/pull_request_template.md'
- 'CONTRIBUTION.md'
- 'CPPLINT.cfg'
- 'LICENSE'
- 'README.md'
- 'bangc-ops/README.md'
- 'requirements.txt'
- '.github/workflows/bangc_all_system_ci.yaml'
- '.github/workflows/daily.yaml'
pull_request:
branches: [master, r*]
paths-ignore:
- 'docs/**'
- 'bangpy-ops/**'
- 'docker/**'
- 'samples/**'
- 'installer/**'
- '.github/ISSUE_TEMPLATE/**'
- '.github/pull_request_template.md'
- 'CONTRIBUTION.md'
- 'CPPLINT.cfg'
- 'LICENSE'
- 'README.md'
- 'bangc-ops/README.md'
- 'requirements.txt'
- '.github/workflows/bangc_all_system_ci.yaml'
- '.github/workflows/daily.yaml'

jobs:
test:
strategy:
matrix:
runner: [mlu270-x5k, mlu290-m5, mlu370-m8]
mlu_ops_version : [v0.4.2]
cntoolkit_version : [cntoolkit3.0.2]
runs-on: ${{matrix.runner}}
steps:
- uses: actions/checkout@v3
Expand All @@ -21,20 +55,20 @@ jobs:
- name: build_bangc_ops
run: >
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./build.sh --sub_module=bangc
- name: bangc_ops_release_temp_cases
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_temp/default_platform
- name: test_bangc_ops_release_temp_370_cases
if: matrix.runner == 'mlu370-m8'
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_temp/370
- name: clean
Expand Down
15 changes: 10 additions & 5 deletions .github/workflows/daily.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@ name: daily
on:
schedule:
- cron: '0 15 * * *'
pull_request:
paths:
- '.github/workflows/daily.yaml'

jobs:
test:
strategy:
matrix:
runner: [mlu270-x5k, mlu290-m5, mlu370-m8]
mlu_ops_version : [v0.4.2]
cntoolkit_version : [cntoolkit3.0.2]
runs-on: ${{matrix.runner}}
steps:
- uses: actions/checkout@v3
Expand All @@ -19,33 +24,33 @@ jobs:
- name: build_bangc_ops
run: >
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
docker run --rm -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./build.sh --sub_module=bangc
- name: bangc_ops_release_test_cases
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_test/default_platform
- name: bangc_ops_release_temp_cases
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_temp/default_platform
- name: test_bangc_ops_release_test_370_cases
if: matrix.runner == 'mlu370-m8'
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_test/370
- name: test_bangc_ops_release_temp_370_cases
if: matrix.runner == 'mlu370-m8'
run: >
docker run --rm --device /dev/cambricon_ctl --device /dev/cambricon_dev0 --device /dev/commu0
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:v0.4.0-devel-x86_64-ubuntu18.04
-v /testdata:/testdata -v $(pwd):/work -w /work docker-user.gotgo.cc:30080/mlu-ops/mluops_ci:${{matrix.mlu_ops_version}}-devel-x86_64-ubuntu18.04-${{matrix.cntoolkit_version}}
./test.sh --sub_module=bangc --cases_dir=/testdata/release_temp/370
- name: clean
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,6 @@ cython_debug/
dep_libs_extract/
package/
test_workspace/
daily.software.cambricon.com/
dependency.txt
bangc-ops/symbol_visibility.map
Binary file removed bangc-ops/kernels/copy/aarch64/copy_union1.mlu.o
Binary file not shown.
Binary file not shown.
Binary file modified bangc-ops/kernels/copy/x86_64/copy_union1.mlu.o
Binary file not shown.
Binary file modified bangc-ops/kernels/copy/x86_64/copy_with_stride_union1.mlu.o
Binary file not shown.
Binary file not shown.
Binary file not shown.
61 changes: 30 additions & 31 deletions bangc-ops/kernels/expand/expand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,18 @@ mluOpExpand(mluOpHandle_t handle, const mluOpTensorDescriptor_t input_desc,
PARAM_CHECK("[mluOpExpand]", input != NULL);
PARAM_CHECK("[mluOpExpand]", output != NULL);

uint64_t dims_input[MLUOP_DIM_MAX];
uint64_t dims_output[MLUOP_DIM_MAX];
uint64_t redims_input[MLUOP_DIM_MAX + 1];
uint64_t redims_output[MLUOP_DIM_MAX + 1];
int32_t dims_input[MLUOP_DIM_MAX];
int32_t dims_output[MLUOP_DIM_MAX];
int32_t redims_input[MLUOP_DIM_MAX + 1];
int32_t redims_output[MLUOP_DIM_MAX + 1];
int32_t count_flag = 0;
int32_t count_index[MLUOP_DIM_MAX + 1];

int fix_num = 0;
int32_t fix_num = 0;
size_t input_size = input_num;

// Reshape dims: A(a, b, c) ---> A(1, 1, 1, 1, 1, a, b, c, 1)
for (int i = 0; i < MLUOP_DIM_MAX; i++) {
for (int32_t i = 0; i < MLUOP_DIM_MAX; ++i) {
dims_input[i] = 1;
dims_output[i] = 1;
redims_input[i] = 1;
Expand All @@ -93,19 +93,23 @@ mluOpExpand(mluOpHandle_t handle, const mluOpTensorDescriptor_t input_desc,
redims_input[MLUOP_DIM_MAX] = 1;
redims_output[MLUOP_DIM_MAX] = 1;

for (int i = 0; i < input_desc->dim; i++) {
for (int32_t i = 0; i < input_desc->dim; ++i) {
dims_input[MLUOP_DIM_MAX - i - 1] =
input_desc->dims[input_desc->dim - i - 1];
}
for (int i = 0; i < output_desc->dim; i++) {
for (int32_t i = 0; i < output_desc->dim; ++i) {
dims_output[MLUOP_DIM_MAX - i - 1] =
output_desc->dims[output_desc->dim - i - 1];
}
while (dims_output[MLUOP_DIM_MAX - 1 - fix_num] == 1) {
fix_num++;
for (int i = 0; i < MLUOP_DIM_MAX; ++i) {
if (dims_output[MLUOP_DIM_MAX - 1 - i] != 1) {
break;
} else {
fix_num++;
}
}

for (int i = 0; i < MLUOP_DIM_MAX; i++) {
for (int32_t i = 0; i < MLUOP_DIM_MAX; ++i) {
if (dims_output[i] % dims_input[i] != 0) {
LOG(ERROR) << "[mluOpExpand] In expand dimension, the size of output"
<< " should be times of the size of input. But now in expand "
Expand All @@ -117,7 +121,7 @@ mluOpExpand(mluOpHandle_t handle, const mluOpTensorDescriptor_t input_desc,
}

// Reshape: dims(1, A, 1, 1, B, 1) change to redims(A, 1, B)
for (int i = MLUOP_DIM_MAX - 1, j = fix_num; i - j >= 0; i--) {
for (int32_t i = MLUOP_DIM_MAX - 1, j = fix_num; i - j >= 0; --i) {
redims_input[i] = dims_input[i - j];
redims_output[i] = dims_output[i - j];
while ((i - j) > 0 && dims_input[i - j] == 1 &&
Expand All @@ -129,7 +133,7 @@ mluOpExpand(mluOpHandle_t handle, const mluOpTensorDescriptor_t input_desc,

size_t output_size = output_num;
// Count how many dims need to expand.
for (int i = 0; i < MLUOP_DIM_MAX + 1; i++) {
for (int32_t i = 0; i < MLUOP_DIM_MAX + 1; ++i) {
count_index[i] = 0;
if (redims_input[i] != redims_output[i]) {
count_flag += 1;
Expand Down Expand Up @@ -162,7 +166,7 @@ mluOpExpand(mluOpHandle_t handle, const mluOpTensorDescriptor_t input_desc,
cnrtFunctionType_t k_type;

k_type = CNRT_FUNC_TYPE_UNION1;
int core_dim = mluop::runtime::getCoreNumOfEachUnionCapability(handle);
int32_t core_dim = mluop::runtime::getCoreNumOfEachUnionCapability(handle);
int32_t union_number = mluop::runtime::getClusterLimitCapability(handle);
k_dim.x = core_dim;
k_dim.y = union_number;
Expand All @@ -179,14 +183,14 @@ mluOpExpand(mluOpHandle_t handle, const mluOpTensorDescriptor_t input_desc,
}

if (count_flag == 1) {
uint64_t high_num = 1;
uint64_t expand_num =
int32_t high_num = 1;
int32_t expand_num =
redims_output[count_index[0]] / redims_input[count_index[0]];
uint64_t low_num = 1;
for (int i = 0; i < count_index[0]; i++) {
int32_t low_num = 1;
for (int32_t i = 0; i < count_index[0]; ++i) {
high_num *= redims_output[i];
}
for (int i = count_index[0] + 1; i < MLUOP_DIM_MAX + 1; i++) {
for (int32_t i = count_index[0] + 1; i < MLUOP_DIM_MAX + 1; ++i) {
low_num *= redims_output[i];
}
if (redims_input[count_index[0]] != 1) {
Expand All @@ -196,25 +200,20 @@ mluOpExpand(mluOpHandle_t handle, const mluOpTensorDescriptor_t input_desc,
<< k_type / CORE_DIM << ", " << k_dim.x << ", " << k_dim.y << ", "
<< k_dim.z << ">>>";
KERNEL_CHECK((mluOpUnion1KernelExpandOneDim(
k_dim, k_type, handle->queue, (void *)input, output, (uint32_t)high_num,
(uint32_t)expand_num, (uint32_t)low_num,
mluOpDataTypeBytes(data_type))));
k_dim, k_type, handle->queue, (void *)input, (void *)output, high_num,
expand_num, low_num, mluOpDataTypeBytes(data_type))));
} else {
INTERNAL_CHECK("mluOpExpand",
MLUOP_STATUS_SUCCESS == policyFunc(handle, &k_dim, &k_type));
VLOG(5) << "Launch Kernel MLUUnion1KernelExpandTensor<<<Union"
<< k_type / CORE_DIM << ", " << k_dim.x << ", " << k_dim.y << ", "
<< k_dim.z << ">>>";
KERNEL_CHECK((mluOpUnion1KernelExpandTensor(
k_dim, k_type, handle->queue, (void *)input, output,
(uint32_t)dims_input[0], (uint32_t)dims_input[1],
(uint32_t)dims_input[2], (uint32_t)dims_input[3],
(uint32_t)dims_input[4], (uint32_t)dims_input[5],
(uint32_t)dims_input[6], (uint32_t)dims_input[7],
(uint32_t)dims_output[0], (uint32_t)dims_output[1],
(uint32_t)dims_output[2], (uint32_t)dims_output[3],
(uint32_t)dims_output[4], (uint32_t)dims_output[5],
(uint32_t)dims_output[6], (uint32_t)dims_output[7],
k_dim, k_type, handle->queue, (void *)input, (void *)output,
dims_input[0], dims_input[1], dims_input[2], dims_input[3],
dims_input[4], dims_input[5], dims_input[6], dims_input[7],
dims_output[0], dims_output[1], dims_output[2], dims_output[3],
dims_output[4], dims_output[5], dims_output[6], dims_output[7],
mluOpDataTypeBytes(input_desc->dtype))));
}

Expand Down
Loading

0 comments on commit c2295c8

Please sign in to comment.