Skip to content

Commit

Permalink
Add ONNXRuntime backend support for quantize model (#782)
Browse files Browse the repository at this point in the history
* add onnxruntime backend support for quantize model

* add mul support

* update mergeconvadd

* fix bug in mergeconvadd and mergeconvbn

* Add func note

* add mnist quantize model test

* rerun_ci

* fix test

* update code

* remove useless code

* fix bugs

* add more quantize algo test

* rerun_ci

* add GetTensorValue in helper

* fix test

* add new type support

* fix GetTensorValue in onnx_helper

* fix ops and change name func

* fix quantize_linear op
  • Loading branch information
yeliang2258 authored Jun 21, 2022
1 parent d5834db commit 354f9b3
Show file tree
Hide file tree
Showing 12 changed files with 1,315 additions and 113 deletions.
16 changes: 9 additions & 7 deletions paddle2onnx/mapper/exporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,19 @@ void ModelExporter::UpdateParameters(
const std::map<std::string, Weight>& params) {
for (auto& item : params) {
auto node = MakeConstant(item.first, item.second);
bool updated = false;
for (int i = 0; i < parameters.size(); ++i) {
auto old_node = parameters[i];
if (old_node->output(0) == item.first) {
parameters.erase(parameters.begin() + i);
parameters.push_back(std::move(node));
updated = true;
break;
}
}
if (!updated) {
parameters.push_back(std::move(node));
}
}
}

Expand Down Expand Up @@ -281,10 +286,6 @@ std::string ModelExporter::Run(const PaddleParser& parser, int opset_version,
}
ExportOp(parser, &_helper, opset_version, 0, i, verbose);
}
if (parser.is_quantized_model) {
// Update int8 weights in quantized OP to float32
UpdateParameters(_helper.updated_params);
}

// construct a onnx model proto
auto model = std::make_shared<ONNX_NAMESPACE::ModelProto>();
Expand All @@ -300,9 +301,10 @@ std::string ModelExporter::Run(const PaddleParser& parser, int opset_version,
ProcessGraphDumplicateNames(&parameters, &inputs, &outputs, &_helper.nodes);
if (parser.is_quantized_model) {
quantize_model_processer.ProcessQuantizeModel(
&parameters, &inputs, &outputs, &_helper.nodes, _helper,
"others"); // TODO(yeliang): set ONNXRuntime as the default deploy
// backend
&parameters, &inputs, &outputs, &_helper.nodes, &_helper, "onnxruntime",
parser);
// Update int8 weights in quantized OP to float32
UpdateParameters(_helper.updated_params);
}
// RemoveIsolatedNodes(&parameters, &inputs, &outputs, &_helper.nodes);

Expand Down
10 changes: 3 additions & 7 deletions paddle2onnx/mapper/nn/conv2d.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,9 @@ void Conv2dMapper::Opset7() {
auto kernel_info = GetInput("Filter");
auto input_info = GetInput("Input");
auto output_info = GetOutput("Output");
auto input = helper_->AutoCast(input_info[0].name, input_info[0].dtype,
P2ODataType::FP32);
auto kernel = helper_->AutoCast(kernel_info[0].name, kernel_info[0].dtype,
P2ODataType::FP32);
auto node = helper_->MakeNode("Conv", {input, kernel});

auto node = helper_->MakeNode(
"Conv", {input_info[0].name, kernel_info[0].name}, {output_info[0].name});
AddAttribute(node, "dilations", dilations_);
std::vector<int64_t> kernel_shape = {kernel_info[0].shape[2],
kernel_info[0].shape[3]};
Expand All @@ -78,8 +76,6 @@ void Conv2dMapper::Opset7() {
}
AddAttribute(node, "pads", paddings);
}
helper_->AutoCast(node->output(0), output_info[0].name, P2ODataType::FP32,
output_info[0].dtype);
}

} // namespace paddle2onnx
61 changes: 61 additions & 0 deletions paddle2onnx/mapper/onnx_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ class OnnxHelper {

int32_t GetOpsetVersion() { return opset_version; }

template <typename T>
bool TryGetTensorValue(const std::string& name, std::vector<T>* value);

std::shared_ptr<ONNX_NAMESPACE::NodeProto> MakeNode(
const std::string& op_type, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs);
Expand Down Expand Up @@ -489,4 +492,62 @@ std::string OnnxHelper::Assign(
return Assign(output, dtype, shape, value);
}

template <typename T>
bool OnnxHelper::TryGetTensorValue(const std::string& name,
std::vector<T>* value) {
for (auto iter = nodes.begin(); iter != nodes.end(); iter++) {
auto node = *iter;
if (node->op_type() != "Constant") {
continue;
}
if (node->output(0) == name) {
for (auto i = 0; i < node->attribute_size(); i++) {
auto attr = node->attribute(i);
if (attr.name() == "value") {
auto tensor = attr.mutable_t();
auto dtype = tensor->data_type();
std::vector<int64_t> shape;
for (int64_t i = 0; i < tensor->dims_size(); i++) {
shape.push_back(tensor->dims(i));
}
int64_t nums = 1;
for (auto& i : shape) nums *= i;
value->resize(nums);
if (dtype == ONNX_NAMESPACE::TensorProto::INT64) {
std::vector<int64_t> val(nums, 0);
memcpy(val.data(), tensor->raw_data().data(),
nums * sizeof(int64_t));
value->assign(val.begin(), val.end());
return true;
} else if (dtype == ONNX_NAMESPACE::TensorProto::INT32) {
std::vector<int32_t> val(nums, 0);
memcpy(val.data(), tensor->raw_data().data(),
nums * sizeof(int32_t));
value->assign(val.begin(), val.end());
return true;
} else if (dtype == ONNX_NAMESPACE::TensorProto::FLOAT) {
std::vector<int32_t> val(nums, 0);
memcpy(val.data(), tensor->raw_data().data(), nums * sizeof(float));
value->assign(val.begin(), val.end());
return true;
} else if (dtype == ONNX_NAMESPACE::TensorProto::DOUBLE) {
std::vector<int32_t> val(nums, 0);
memcpy(val.data(), tensor->raw_data().data(),
nums * sizeof(double));
value->assign(val.begin(), val.end());
return true;
} else {
P2OLogger() << "[WARNING] OnnxHelper function TryGetTensorValue "
"only support get int64_t/int32_t/float/double "
"value from Constant now."
<< std::endl;
return false;
}
}
}
}
}
return false;
}

} // namespace paddle2onnx
18 changes: 12 additions & 6 deletions paddle2onnx/mapper/quantize/dequantize_linear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,19 @@ void DequantizeLinearMapper::Opset10() {
for (auto &i : scales) {
onnx_scales.push_back(i / 127);
}

auto scale_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::FLOAT, onnx_scales);

std::vector<int64_t> onnx_zeros(onnx_scales.size(), 0);
auto zero_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros);
std::string scale_node, zero_node;
if (onnx_zeros.size() == 1) {
scale_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::FLOAT,
onnx_scales[0]);
zero_node =
helper_->Constant({}, ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros[0]);
} else {
scale_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::FLOAT, onnx_scales);
zero_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros);
}

std::vector<float> weight;
TryGetInputValue("X", &weight);
Expand Down
Empty file modified paddle2onnx/mapper/quantize/dequantize_linear.h
100644 → 100755
Empty file.
22 changes: 17 additions & 5 deletions paddle2onnx/mapper/quantize/quantize_linear.cc
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ int32_t QuantizeLinearMapper::GetMinOpset(bool verbose) {
Error() << "Only support bit_length = 8." << std::endl;
return -1;
}
if (round_type_ != 0) {
Error() << "The round_type attr of quantize_linear must be 0." << std::endl;
return -1;
}
if (scales.size() > 1) {
auto x_info = GetInput("X");
if (x_info[0].shape[quant_axis_] != scales.size()) {
Expand All @@ -56,13 +60,21 @@ void QuantizeLinearMapper::Opset10() {
for (auto i : scales) {
onnx_scales.push_back(i / 127);
}
std::vector<int64_t> onnx_zeros(onnx_scales.size(), 0);

auto scale_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::FLOAT, onnx_scales);
std::string scale_node, zero_node;
if (onnx_scales.size() == 1) {
scale_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::FLOAT,
onnx_scales[0]);
zero_node =
helper_->Constant({}, ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros[0]);
} else {
scale_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::FLOAT, onnx_scales);
zero_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros);
}

std::vector<int64_t> onnx_zeros(onnx_scales.size(), 0);
auto zero_node =
helper_->Constant(ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros);
auto node = helper_->MakeNode("QuantizeLinear",
{x_info[0].name, scale_node, zero_node},
{GetOutput("Y")[0].name});
Expand Down
5 changes: 5 additions & 0 deletions paddle2onnx/mapper/quantize/quantize_linear.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,17 @@ class QuantizeLinearMapper : public Mapper {
if (quant_axis_ == -1) {
quant_axis_ = 1;
}
if (HasAttr("round_type")) {
GetAttr("round_type", &round_type_);
}
}

int32_t GetMinOpset(bool verbose = false);
void Opset10();

private:
int64_t round_type_ = -1; // 0: rounding to nearest ties to even. 1: rounding
// to nearest ties away from zero.
int64_t quant_axis_ = 1;
int64_t bit_length_ = 8;
};
Expand Down
Loading

0 comments on commit 354f9b3

Please sign in to comment.