| /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| #include "tensorflow/lite/kernels/kernel_util.h" |
| |
| #include <stdint.h> |
| #include <stdlib.h> |
| |
| #include <algorithm> |
| #include <limits> |
| #include <memory> |
| |
| #include "tensorflow/lite/c/builtin_op_data.h" |
| #include "tensorflow/lite/c/common.h" |
| #include "tensorflow/lite/kernels/internal/cppmath.h" |
| #include "tensorflow/lite/kernels/internal/quantization_util.h" |
| |
| namespace tflite { |
| |
| // Per-axis |
| TfLiteStatus PopulateConvolutionQuantizationParams( |
| TfLiteContext* context, const TfLiteTensor* input, |
| const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, |
| const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, |
| int32_t* output_activation_min, int32_t* output_activation_max, |
| int32_t* per_channel_multiplier, int* per_channel_shift) { |
| const auto* affine_quantization = |
| reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params); |
| return PopulateConvolutionQuantizationParams( |
| context, input, filter, bias, output, activation, multiplier, shift, |
| output_activation_min, output_activation_max, per_channel_multiplier, |
| per_channel_shift, affine_quantization->scale->size); |
| } |
| |
| // Per-axis & per-tensor |
| TfLiteStatus PopulateConvolutionQuantizationParams( |
| TfLiteContext* context, const TfLiteTensor* input, |
| const TfLiteTensor* filter, const TfLiteTensor* bias, TfLiteTensor* output, |
| const TfLiteFusedActivation& activation, int32_t* multiplier, int* shift, |
| int32_t* output_activation_min, int32_t* output_activation_max, |
| int32_t* per_channel_multiplier, int* per_channel_shift, int num_channels) { |
| TF_LITE_ENSURE_EQ(context, input->quantization.type, |
| kTfLiteAffineQuantization); |
| TF_LITE_ENSURE_EQ(context, filter->quantization.type, |
| kTfLiteAffineQuantization); |
| // TODO(jianlijianli): Enable bias type check and bias scale == input scale |
| // * filter scale for each channel in affine quantization once bias |
| // quantization is properly populated. |
| // TF_LITE_ENSURE_EQ(context, bias->quantization.type, |
| // kTfLiteAffineQuantization); |
| |
| // Check data type. |
| const auto* affine_quantization = |
| reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params); |
| TF_LITE_ENSURE(context, affine_quantization); |
| TF_LITE_ENSURE(context, affine_quantization->scale); |
| const bool is_per_channel = affine_quantization->scale->size > 1; |
| if (is_per_channel) { |
| // Currently only Int8/Int16 is supported for per channel quantization. |
| TF_LITE_ENSURE(context, |
| input->type == kTfLiteInt8 || input->type == kTfLiteInt16); |
| TF_LITE_ENSURE_EQ(context, filter->type, kTfLiteInt8); |
| TF_LITE_ENSURE_EQ(context, affine_quantization->scale->size, num_channels); |
| TF_LITE_ENSURE_EQ( |
| context, num_channels, |
| filter->dims->data[affine_quantization->quantized_dimension]); |
| } |
| |
| // Populate multiplier and shift using affine quantization. |
| const float input_scale = input->params.scale; |
| const float output_scale = output->params.scale; |
| const float* filter_scales = affine_quantization->scale->data; |
| for (int i = 0; i < num_channels; ++i) { |
| // If per-tensor quantization parameter is specified, broadcast it along the |
| // quantization dimension (channels_out). |
| const float scale = is_per_channel ? filter_scales[i] : filter_scales[0]; |
| const double filter_scale = static_cast<double>(scale); |
| const double effective_output_scale = static_cast<double>(input_scale) * |
| filter_scale / |
| static_cast<double>(output_scale); |
| int32_t significand; |
| int channel_shift; |
| QuantizeMultiplier(effective_output_scale, &significand, &channel_shift); |
| per_channel_multiplier[i] = significand; |
| per_channel_shift[i] = channel_shift; |
| } |
| |
| // Populate scalar quantization parameters. |
| // This check on legacy quantization parameters is kept only for backward |
| // compatibility. |
| if (input->type == kTfLiteUInt8) { |
| // Check bias scale == input scale * filter scale. |
| double real_multiplier = 0.0; |
| TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler( |
| context, input, filter, bias, output, &real_multiplier)); |
| int exponent; |
| |
| // Populate quantization parameters with multiplier and shift. |
| QuantizeMultiplier(real_multiplier, multiplier, &exponent); |
| *shift = -exponent; |
| } |
| if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8 || |
| input->type == kTfLiteInt16) { |
| TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized( |
| context, activation, output, output_activation_min, |
| output_activation_max)); |
| } |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, |
| const TfLiteTensor* input, |
| const TfLiteTensor* filter, |
| const TfLiteTensor* bias, |
| TfLiteTensor* output, |
| double* multiplier) { |
| const double input_product_scale = static_cast<double>(input->params.scale) * |
| static_cast<double>(filter->params.scale); |
| // TODO(ahentz): The following conditions must be guaranteed by the training |
| // pipeline. |
| if (bias) { |
| const double bias_scale = static_cast<double>(bias->params.scale); |
| // Here we're making sure the input_product_scale & bias_scale are about the |
| // same. Since we have: |
| // (output - output_zp) * output_scale = |
| // input_product_scale * input_product + bias * bias_scale ---- (0) |
| // |
| // (0) equals: |
| // (input_product + bias) * input_product_scale ----- (1) |
| // + |
| // bias * (bias_scale - input_product_scale) ------ (2) |
| // |
| // For the real kernel computation, we're doing (1), so we really need to |
| // make sure (2) has minimum impact on the output, so: |
| // bias * (bias_scale - input_product_scale) / output_scale should be |
| // a small number for an integer. |
| // Since normally bias should be within a small range. |
| // We should expect (bias_scale - input_product_scale) / output_scale to |
| // be a small number like 0.02. |
| const double scale_diff = std::abs(input_product_scale - bias_scale); |
| const double output_scale = static_cast<double>(output->params.scale); |
| |
| TF_LITE_ENSURE(context, scale_diff / output_scale <= 0.02); |
| } |
| return GetQuantizedConvolutionMultipler(context, input, filter, output, |
| multiplier); |
| } |
| |
| TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context, |
| const TfLiteTensor* input, |
| const TfLiteTensor* filter, |
| TfLiteTensor* output, |
| double* multiplier) { |
| const double input_product_scale = |
| static_cast<double>(input->params.scale * filter->params.scale); |
| TF_LITE_ENSURE(context, input_product_scale >= 0); |
| *multiplier = input_product_scale / static_cast<double>(output->params.scale); |
| |
| return kTfLiteOk; |
| } |
| |
| namespace { |
| void CalculateActivationRangeQuantizedImpl(TfLiteFusedActivation activation, |
| int32_t qmin, int32_t qmax, |
| TfLiteTensor* output, |
| int32_t* act_min, int32_t* act_max) { |
| const auto scale = output->params.scale; |
| const auto zero_point = output->params.zero_point; |
| |
| auto quantize = [scale, zero_point](float f) { |
| return zero_point + static_cast<int32_t>(TfLiteRound(f / scale)); |
| }; |
| |
| if (activation == kTfLiteActRelu) { |
| *act_min = std::max(qmin, quantize(0.0)); |
| *act_max = qmax; |
| } else if (activation == kTfLiteActRelu6) { |
| *act_min = std::max(qmin, quantize(0.0)); |
| *act_max = std::min(qmax, quantize(6.0)); |
| } else if (activation == kTfLiteActReluN1To1) { |
| *act_min = std::max(qmin, quantize(-1.0)); |
| *act_max = std::min(qmax, quantize(1.0)); |
| } else { |
| *act_min = qmin; |
| *act_max = qmax; |
| } |
| } |
| } // namespace |
| |
| TfLiteStatus CalculateActivationRangeQuantized(TfLiteContext* context, |
| TfLiteFusedActivation activation, |
| TfLiteTensor* output, |
| int32_t* act_min, |
| int32_t* act_max) { |
| int32_t qmin = 0; |
| int32_t qmax = 0; |
| if (output->type == kTfLiteUInt8) { |
| qmin = std::numeric_limits<uint8_t>::min(); |
| qmax = std::numeric_limits<uint8_t>::max(); |
| } else if (output->type == kTfLiteInt8) { |
| qmin = std::numeric_limits<int8_t>::min(); |
| qmax = std::numeric_limits<int8_t>::max(); |
| } else if (output->type == kTfLiteInt16) { |
| qmin = std::numeric_limits<int16_t>::min(); |
| qmax = std::numeric_limits<int16_t>::max(); |
| } else { |
| TF_LITE_ENSURE(context, false); |
| } |
| |
| CalculateActivationRangeQuantizedImpl(activation, qmin, qmax, output, act_min, |
| act_max); |
| return kTfLiteOk; |
| } |
| |
| bool HaveSameShapes(const TfLiteTensor* input1, const TfLiteTensor* input2) { |
| return TfLiteIntArrayEqual(input1->dims, input2->dims); |
| } |
| |
| // TODO(petewarden): Having macros around this is ugly, look at other strategies |
| // before replicating this approach elsewhere. |
| #ifndef TF_LITE_STATIC_MEMORY |
| TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, |
| const TfLiteTensor* input1, |
| const TfLiteTensor* input2, |
| TfLiteIntArray** output_shape) { |
| int dims1 = NumDimensions(input1); |
| int dims2 = NumDimensions(input2); |
| int out_dims = std::max(dims1, dims2); |
| if (NumElements(input1) == 0) { |
| *output_shape = TfLiteIntArrayCopy(input1->dims); |
| return kTfLiteOk; |
| } |
| std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape( |
| TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree); |
| for (int i = 0; i < out_dims; ++i) { |
| int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); |
| int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); |
| TF_LITE_ENSURE(context, d1 == d2 || d1 == 1 || d2 == 1); |
| shape->data[out_dims - i - 1] = std::max(d1, d2); |
| } |
| *output_shape = shape.release(); |
| return kTfLiteOk; |
| } |
| |
| TfLiteStatus CalculateShapeForBroadcast(TfLiteContext* context, |
| const TfLiteTensor* input1, |
| const TfLiteTensor* input2, |
| const TfLiteTensor* input3, |
| TfLiteIntArray** output_shape) { |
| int dims1 = NumDimensions(input1); |
| int dims2 = NumDimensions(input2); |
| int dims3 = NumDimensions(input3); |
| int out_dims = std::max(std::max(dims1, dims2), dims3); |
| std::unique_ptr<TfLiteIntArray, void (*)(TfLiteIntArray*)> shape( |
| TfLiteIntArrayCreate(out_dims), TfLiteIntArrayFree); |
| for (int i = 0; i < out_dims; ++i) { |
| int d1 = i >= dims1 ? 1 : SizeOfDimension(input1, dims1 - i - 1); |
| int d2 = i >= dims2 ? 1 : SizeOfDimension(input2, dims2 - i - 1); |
| int d3 = i >= dims3 ? 1 : SizeOfDimension(input3, dims3 - i - 1); |
| int max_value = std::max(std::max(d1, d2), d3); |
| TF_LITE_ENSURE(context, d1 == 1 || d1 == max_value); |
| TF_LITE_ENSURE(context, d2 == 1 || d2 == max_value); |
| TF_LITE_ENSURE(context, d3 == 1 || d3 == max_value); |
| shape->data[out_dims - i - 1] = max_value; |
| } |
| *output_shape = shape.release(); |
| return kTfLiteOk; |
| } |
| #endif // TF_LITE_STATIC_MEMORY |
| |
| } // namespace tflite |