Skip to content

Commit

Permalink
[Feature](bangc-ops): add concat binary operator.
Browse files Browse the repository at this point in the history
  • Loading branch information
zhengleiZL committed Oct 19, 2023
1 parent 88c2fa7 commit 02d02fd
Show file tree
Hide file tree
Showing 10 changed files with 665 additions and 201 deletions.
41 changes: 41 additions & 0 deletions bangc-ops/kernels/concat/concat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*************************************************************************
* Copyright (C) [2023] by Cambricon, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#include "kernels/kernel_wrapper/wrapper.h"

mluOpStatus_t MLUOP_WIN_API mluOpConcat(
mluOpHandle_t handle,
const int concat_num,
const int axis,
const mluOpTensorDescriptor_t inputs_desc[],
const void *const inputs[],
void *workspace,
size_t workspace_size,
const mluOpTensorDescriptor_t output_desc,
void *output) {
ConcatWrapper wrapper;
mluOpStatus_t ret = wrapper.invoke(handle, concat_num, axis, inputs_desc,
inputs, workspace, workspace_size,
output_desc, output);
return ret;
}

Binary file modified bangc-ops/kernels/kernel_wrapper/lib/libextops.a
100644 → 100755
Binary file not shown.
358 changes: 158 additions & 200 deletions bangc-ops/kernels/kernel_wrapper/wrapper.h

Large diffs are not rendered by default.

150 changes: 150 additions & 0 deletions bangc-ops/mlu_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -15293,6 +15293,156 @@ mluOpStridedSlice(mluOpHandle_t handle,
const mluOpTensorDescriptor_t output_desc,
void *output);

// Group:Concat
/*!
* @brief Concatenates the list of input tensors \b inputs along the given dimension \b axis.
*
* @param[in] handle
* Handle to a Cambricon MLUOP context that is used to manage MLU devices and queues in
* ::mluOpConcat operation. For detailed information, see ::mluOpHandle_t.
* @param[in] concat_num
* Number of tensors needed to be concatenated.
* @param[in] axis
* Dimension along which to be concatenated. The value must be in the range of [-rank, rank),
* where rank is the number of dimensions in the input tensors,
* and negative \b axis refers to ``axis + rank``.
* @param[in] inputs_desc
* The list of descriptors of input tensors. For detailed information,
* see ::mluOpTensorDescriptor_t.
* @param[in] inputs
* A host pointer to a list of MLU pointers, which point to the MLU memory that store the
* input tensors.
* @param[in] workspace
* Pointer to the MLU memory that is used as an extra workspace for the concat operation.
* For more information about workspace, see "Cambricon BANG C OPS User Guide". Because ::mluOpConcat
* does not need extra workspace, the \b workspace can be set to NULL.
* @param[in] workspace_size
* The size of the extra workspace in bytes that needs to be used in the concat operation.
* You can get the size of the workspace with the ::mluOpGetConcatWorkspaceSize function.
* Because ::mluOpConcat does not need extra workspace, the \b workspace_size can be set to 0.
* @param[in] output_desc
* The descriptor of the output tensor. For detailed information,
* see ::mluOpTensorDescriptor_t.
* @param[out] output
* Pointer to the MLU memory that stores the output tensor.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM, ::MLUOP_STATUS_ALLOC_FAILED
*
* @par Formula
* - See "Concat Operator" section in "Cambricon MLUOP User Guide" for details.
*
* @par Data Type
* - The (I/O)function supports the following byte-width data types for \b input and \b output tensors.
* The byte width of a data type can be obtained by ::mluOpGetSizeOfDataType function.
* <b>Note that all the tensors must have the same data type. If the tensors are in
* fixed-point data type, the quantization parameters of all the tensors should be the same.
* </b>
* - The supported byte-width data types are as follows:
* - input tensor: 1-byte, 2-byte, 4-byte, 8-byte.
* - output tensor: 1-byte, 2-byte, 4-byte, 8-byte.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - The parameters must meet the following requirements:
* - The parameter \b concat_num should be greater than 0.
* - The number of dimensions of all tensors must match, including inputs and output.
* - All dimensions except \b axis must be equal, and the dimension of output on \b axis must be
* equal to the sum of input dimensions on \b axis.
*
* @par API Dependency
* - Before calling this function to implement concat, you need to call
* ::mluOpGetConcatWorkspaceSize to get the extra space size needed in concat operation.
*
* @par Note
* - None.
*
* @par Requirements
* - None.
*
* @par Example
* - The example of concat operation is as follows:
* @verbatim
input: 3 tensors with the shapes of 2 * 3, 2 * 3 and 1 * 3, respectively
--> [[1,2,3],[4,5,6]]
--> [[7,8,9],[10,11,12]]
--> [[13,14,15]]

concat_num: 3

axis: 0

Then we will get the output:

output: a tensor of 5 * 3 --> [[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15]]
@endverbatim
*
* @par Reference
* - http://www.tensorflow.org/api_docs/python/tf/concat
*/
mluOpStatus_t MLUOP_WIN_API
mluOpConcat(mluOpHandle_t handle,
const int concat_num,
const int axis,
const mluOpTensorDescriptor_t inputs_desc[],
const void *const inputs[],
void *workspace,
size_t workspace_size,
const mluOpTensorDescriptor_t output_desc,
void *output);

// Group:Concat
/*!
* @brief Returns in \b size the size of the MLU memory that is used as an extra workspace to
* optimize the concat operation.
*
* @param[in] handle
* Handle to a Cambricon MLUOP context that is used to manage MLU devices and queues in
* ::mluOpConcat operation. For detailed information, see ::mluOpHandle_t.
* @param[in] concat_num
* Number of tensors needed to be concatenated.
* @param[out] size
* A host pointer to the returned size of the extra workspace in bytes that is used in the
* ::mluOpConcat operation. At present, because ::mluOpConcat does not need extra workspace,
* the \b size will be returned with 0.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Formula
* - None.
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - The parameters must meet the following requirements:
* - The parameter \b concat_num should be greater than 0.
*
* @par API Dependency
* - The allocated extra workspace should be passed to the ::mluOpConcat function to perform the
* ::mluOpConcat operation.
*
* @Note
* - None.
*
* @par Requirements
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpGetConcatWorkspaceSize(mluOpHandle_t handle, const int concat_num, size_t *size);

#if defined(__cplusplus)
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion bangc-ops/test/mlu_op_gtest/pb_gtest/mlu_op_test_proto
120 changes: 120 additions & 0 deletions bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/concat/concat.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*************************************************************************
* Copyright (C) [2023] by Cambricon, Inc.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#include "concat.h"

#include <memory>

namespace mluoptest {

void ConcatExecutor::paramCheck() {
assert(parser_->getInputNum() > 0);
assert(parser_->getOutputNum() == 1);
if (!parser_->getProtoNode()->has_concat_param()) {
LOG(ERROR) << "Lose concat param. ";
}
}

void ConcatExecutor::workspaceMalloc() {
input_num_ = parser_->getInputNum();
MLUOP_CHECK(
mluOpGetConcatWorkspaceSize(handle_, input_num_, &workspace_size_));
VLOG(4) << "Malloc workspace space.";
void *temp = mlu_runtime_.allocate(workspace_size_);
workspace_.push_back(temp);
VLOG(4) << "Malloc addr: " << temp << " , size: " << workspace_size_;

eva_->setMluWorkspaceSize(workspace_size_);
}

void ConcatExecutor::compute() {
VLOG(4) << "ConcatExecutor compute ";
if (!parser_->getProtoNode()->has_concat_param()) {
LOG(ERROR) << "Lose concat param. ";
}
axis_ = parser_->getProtoNode()->concat_param().axis();

std::vector<void *> pdev_input_h(input_num_);
for (int i = 0; i < input_num_; i++) {
pdev_input_h[i] = data_vector_[i].device_ptr;
}

mluOpTensorDescriptor_t *in_desc =
cpu_runtime_.allocate(new mluOpTensorDescriptor_t[input_num_]);
for (int i = 0; i < input_num_; i++) {
in_desc[i] = tensor_desc_[i].tensor;
}
auto out_desc = tensor_desc_[input_num_].tensor;

VLOG(4) << "call mluOpconcatTensor()";
interface_timer_.start();
MLUOP_CHECK(mluOpConcat(handle_, input_num_, axis_, in_desc,
pdev_input_h.data(), workspace_[0], workspace_size_,
out_desc, data_vector_[input_num_].device_ptr));
interface_timer_.stop();

if (in_desc) {
cpu_runtime_.deallocate(in_desc);
in_desc = nullptr;
}
}

void ConcatExecutor::workspaceFree() {
VLOG(4) << "Free device workspace space.";
if (workspace_[0] != nullptr) {
mlu_runtime_.deallocate(workspace_[0]);
}
}

void ConcatExecutor::cpuConcat(std::vector<TensorPair> input_desc,
std::vector<float *> input, int input_num,
int axis_t, float *output) {
int dim_num = input_desc[0].tensor->dim;
size_t axis = axis_t < 0 ? axis_t + dim_num : axis_t;
size_t high_size = 1;
for (size_t i = 0; i < axis; i++) {
high_size *= input_desc[0].tensor->dims[i];
}
size_t low_low_size = 1;
for (size_t i = dim_num - 1; i > axis; i--) {
low_low_size *= input_desc[0].tensor->dims[i];
}
size_t *low_sizes = cpu_runtime_.allocate(new size_t[input_num]);
for (size_t i = 0; i < input_num; i++) {
low_sizes[i] = input_desc[i].tensor->dims[axis] * low_low_size;
}

size_t offset = 0;
for (size_t j = 0; j < high_size; j++) {
for (size_t i = 0; i < input_num; i++) {
memcpy(output + offset, input[i] + j * low_sizes[i],
low_sizes[i] * sizeof(float));
offset += low_sizes[i];
}
}
cpu_runtime_.deallocate(low_sizes);
}

void ConcatExecutor::cpuCompute() {
assert(parser_->getInputNum() > 0);
assert(parser_->getOutputNum() == 1);

cpuConcat(tensor_desc_, cpu_fp32_input_, input_num_, axis_,
cpu_fp32_output_[0]);
}

int64_t ConcatExecutor::getTheoryOps() {
int64_t theory_ops = parser_->getOutputDataCount(0);
VLOG(4) << "getTheoryOps: " << theory_ops << " ops";
return theory_ops;
}

} // namespace mluoptest
40 changes: 40 additions & 0 deletions bangc-ops/test/mlu_op_gtest/pb_gtest/src/zoo/concat/concat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*************************************************************************
* Copyright (C) [2023] by Cambricon, Inc.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#ifndef TEST_MLU_OP_GTEST_SRC_ZOO_CONCAT_CONCAT_H_
#define TEST_MLU_OP_GTEST_SRC_ZOO_CONCAT_CONCAT_H_
#include <vector>
#include "executor.h"

namespace mluoptest {

class ConcatExecutor : public Executor {
public:
ConcatExecutor() {}
~ConcatExecutor() {}

void paramCheck();
void workspaceMalloc();
void compute();
void workspaceFree();
void cpuCompute();
void cpuConcat(std::vector<TensorPair> input_desc, std::vector<float *> input,
int input_num, int axis_t, float *output);
int64_t getTheoryOps() override;

private:
int axis_;
int input_num_;
size_t workspace_size_;
};

} // namespace mluoptest
#endif // TEST_MLU_OP_GTEST_SRC_ZOO_CONCAT_CONCAT_H_
Loading

0 comments on commit 02d02fd

Please sign in to comment.