Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transposed convolution improvements #160

Merged
merged 2 commits into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ARM.CMSIS-NN.pdsc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
<file category="source" name="Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_wrapper_s8.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_x.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_w.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_y.c"/>
Expand Down Expand Up @@ -121,6 +122,7 @@
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nntables.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_transpose_conv_row_s8_s32.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s4.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s4.c"/>
Expand Down
83 changes: 75 additions & 8 deletions Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
* $Date: 23 October 2024
* $Revision: V.17.3.0
* $Date: 04 November 2024
* $Revision: V.18.0.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -415,6 +415,8 @@ arm_cmsis_nn_status arm_convolve_even_s4(const cmsis_nn_context *ctx,
* @param[in] filter_data Filter data pointer. Data type: int8
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int32
* @param[in] upscale_dims Inserts zeroes to upscale the input in h/w dimensions if set to 2. This is used for
* tranposed convolution.
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int8
*
Expand All @@ -436,6 +438,7 @@ arm_cmsis_nn_status arm_convolve_s8(const cmsis_nn_context *ctx,
const int8_t *filter_data,
const cmsis_nn_dims *bias_dims,
const int32_t *bias_data,
const cmsis_nn_dims *upscale_dims,
const cmsis_nn_dims *output_dims,
int8_t *output_data);

Expand All @@ -461,6 +464,54 @@ int32_t arm_convolve_s4_get_buffer_size(const cmsis_nn_dims *input_dims, const c
*/
int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);

/**
* @brief Wrapper to select optimal transposed convolution algorithm depending on parameters.
* @param[in, out] ctx Function context that contains the additional buffer if required by the
* function.
* arm_transpose_conv_s8_get_buffer_size will return the buffer_size if required.
* The caller is expected to clear the buffer, if applicable, for security
reasons.
* @param[in, out] output_ctx Temporary scratch buffer.
* The size required size is: output width * output height * output channel * 4
* The caller is expected to clear the buffer, if applicable, for security
* reasons.
* @param[in] transpose_conv_params Convolution parameters (e.g. strides, dilations, pads,...).
* Range of transpose_conv_params->input_offset : [-127, 128]
* Range of transpose_conv_params->output_offset : [-128, 127]
* @param[in] quant_params Per-channel quantization info.
* It contains the multiplier and shift values to be applied to each out channel.
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] input_data Input (activation) data pointer. Data type: int8
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
* spatial filter dimensions
* @param[in] filter_data Filter data pointer. Data type: int8
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int32
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int8

* @return The function returns either
* <code>ARM_CMSIS_NN_ARG_ERROR</code> if argument constraints fail. or,
* <code>ARM_CMSIS_NN_SUCCESS</code> on successful completion.
*
* @details
* 1. Supported framework: TensorFlow Lite micro
* 2. Additional memory is required for optimization. Refer to arguments 'ctx' and 'output_ctx' for details.
*
*/
arm_cmsis_nn_status arm_transpose_conv_wrapper_s8(const cmsis_nn_context *ctx,
const cmsis_nn_context *output_ctx,
const cmsis_nn_transpose_conv_params *transpose_conv_params,
const cmsis_nn_per_channel_quant_params *quant_params,
const cmsis_nn_dims *input_dims,
const int8_t *input_data,
const cmsis_nn_dims *filter_dims,
const int8_t *filter_data,
const cmsis_nn_dims *bias_dims,
const int32_t *bias_data,
const cmsis_nn_dims *output_dims,
int8_t *output_data);

/**
* @brief Basic s8 transpose convolution function
* @param[in, out] ctx Function context that contains the additional buffer if required by the
Expand Down Expand Up @@ -510,19 +561,35 @@ arm_cmsis_nn_status arm_transpose_conv_s8(const cmsis_nn_context *ctx,
int8_t *output_data);

/**
* @brief Get the required buffer size for s8 transpose conv function
* @brief Get the required buffer size for ctx in s8 transpose conv function
*
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
* are the spatial filter dimensions
* @param[in] out_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[in] transposed_conv_params Transposed convolution parameters
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
* are the spatial filter dimensions
* @param[in] out_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @return The function returns required buffer size(bytes)
*
*/
int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_dims *input_dims,
int32_t arm_transpose_conv_s8_get_buffer_size(const cmsis_nn_transpose_conv_params *transposed_conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims,
const cmsis_nn_dims *out_dims);

/**
* @brief Get the required buffer size for output_ctx in s8 transpose conv function
*
* @param[in] transposed_conv_params Transposed convolution parameters
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
* are the spatial filter dimensions
* @return The function returns required buffer size(bytes)
*
*/
int32_t arm_transpose_conv_s8_get_reverse_conv_buffer_size(const cmsis_nn_transpose_conv_params *transposed_conv_params,
const cmsis_nn_dims *input_dims,
const cmsis_nn_dims *filter_dims);

/**
* @brief Get size of additional buffer required by arm_transpose_conv_s8() for processors with DSP extension.
* Refer to arm_transpose_conv_s8_get_buffer_size() for function argument details.
Expand Down
49 changes: 47 additions & 2 deletions Include/arm_nnsupportfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
* $Date: 08 November 2024
* $Revision: V.22.6.1
* $Date: 08 Nov 2024
* $Revision: V.22.7.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -72,6 +72,10 @@ extern "C" {
// to not loose precision.
#define MAX_COL_COUNT (512)

// CMSIS-NN has two implementations of the transpose conv operator, selected depending on the number of input
// channels. This is based on heuristics and may be finetuned depending on other parameters of the operator
#define REVERSE_TCOL_EFFICIENT_THRESHOLD (16)

// Threshold for number of output channels that decide whether to convert a depthwise conv to a
// regular conv operation when number of input channels is one.
// Only applicable for processors with MVE extension.
Expand Down Expand Up @@ -1014,6 +1018,47 @@ int16_t *arm_nn_depthwise_conv_nt_t_s16(const int16_t *lhs,
const int64_t *const output_bias,
int16_t *out);

/**
* @brief Row of s8 scalars multiplicated with a s8 matrix ad accumulated into a s32 rolling scratch buffer.
* Helpfunction for transposed convolution.
*
* @param[in] lhs Input left-hand side scalars
* @param[in] rhs Input right-hand side matrix
* @param[out] output_start Output buffer start
* @param[in] output_index Output buffer current index
* @param[in] output_max Output buffer size
* @param[in] rhs_rows Number of rows in rhs matrix
* @param[in] rhs_cols Number of columns in rhs matrix
* @param[in] input_channels Number of input channels
* @param[in] output_channels Number of output channels
* @param[in] lhs_offset Offset added to lhs before multiplication
* @param[in] row_offset Address offset between each row of data output
* @param[in] input_x Length of lhs scalar row.
* @param[in] stride_x Address offset between each scalar-matrix multiplication result.
* @param[in] skip_row_top Skip rows on top of the filter, used for padding.
* @param[in] skip_row_bottom Skip rows in the bottom of the filter, used for padding.
*
* @return The function returns ARM_CMSIS_NN_SUCCESS
*
* @note Rolling buffer refers to how the function wraps around the scratch buffer, e.g. it starts writing at
* [output_start + output_index], writes to [output_start + output_max] and then continues at [output_start] again.
*/
arm_cmsis_nn_status arm_nn_transpose_conv_row_s8_s32(const int8_t *lhs,
const int8_t *rhs,
int32_t *output_start,
const int32_t output_index,
const int32_t output_max,
const int32_t rhs_rows,
const int32_t rhs_cols,
const int32_t input_channels,
const int32_t output_channels,
const int32_t lhs_offset,
const int32_t row_offset,
const int32_t input_x,
const int32_t stride_x,
const int32_t skip_row_top,
const int32_t skip_row_bottom);

/**
@brief Read 2 s16 elements and post increment pointer.
@param[in] in_q15 Pointer to pointer that holds address of input.
Expand Down
6 changes: 4 additions & 2 deletions Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_convolve_1_x_n_s8.c
* Description: s8 version of 1xN convolution using symmetric quantization.
*
* $Date: 19 March 2024
* $Revision: V.3.6.0
* $Date: 04 November 2024
* $Revision: V.3.6.1
*
* Target : Arm(R) M-Profile Architecture
*
Expand Down Expand Up @@ -107,6 +107,7 @@ arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
filter_data,
bias_dims,
bias_data,
NULL,
output_dims,
output_data);
}
Expand Down Expand Up @@ -219,6 +220,7 @@ arm_cmsis_nn_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
filter_data,
bias_dims,
bias_data,
NULL,
output_dims,
output_data);

Expand Down
Loading