| /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ |
| #define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ |
| |
| #include "tensorflow/lite/kernels/internal/common.h" |
| |
| namespace tflite { |
| namespace reference_integer_ops { |
| |
| inline void FullyConnected( |
| const FullyConnectedParams& params, const RuntimeShape& input_shape, |
| const int8_t* input_data, const RuntimeShape& filter_shape, |
| const int8_t* filter_data, const RuntimeShape& bias_shape, |
| const int32* bias_data, const RuntimeShape& output_shape, |
| int8_t* output_data) { |
| const int32 input_offset = params.input_offset; |
| const int32 filter_offset = params.weights_offset; |
| const int32 output_offset = params.output_offset; |
| const int32 output_multiplier = params.output_multiplier; |
| const int output_shift = params.output_shift; |
| const int32 output_activation_min = params.quantized_activation_min; |
| const int32 output_activation_max = params.quantized_activation_max; |
| TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); |
| TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); |
| |
| TFLITE_DCHECK_LE(output_activation_min, output_activation_max); |
| const int filter_dim_count = filter_shape.DimensionsCount(); |
| const int batches = output_shape.Dims(0); |
| const int output_depth = output_shape.Dims(1); |
| TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); |
| const int accum_depth = filter_shape.Dims(filter_dim_count - 1); |
| for (int b = 0; b < batches; ++b) { |
| for (int out_c = 0; out_c < output_depth; ++out_c) { |
| int32 acc = 0; |
| for (int d = 0; d < accum_depth; ++d) { |
| int32 input_val = input_data[b * accum_depth + d]; |
| int32 filter_val = filter_data[out_c * accum_depth + d]; |
| acc += (filter_val + filter_offset) * (input_val + input_offset); |
| } |
| if (bias_data) { |
| acc += bias_data[out_c]; |
| } |
| acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); |
| acc += output_offset; |
| acc = std::max(acc, output_activation_min); |
| acc = std::min(acc, output_activation_max); |
| output_data[out_c + output_depth * b] = static_cast<int8_t>(acc); |
| } |
| } |
| } |
| |
| inline void FullyConnected( |
| const FullyConnectedParams& params, const RuntimeShape& input_shape, |
| const int16_t* input_data, const RuntimeShape& filter_shape, |
| const int8_t* filter_data, const RuntimeShape& bias_shape, |
| const int64_t* bias_data, const RuntimeShape& output_shape, |
| int16_t* output_data) { |
| const int32 filter_offset = params.weights_offset; |
| const int32 output_multiplier = params.output_multiplier; |
| const int output_shift = params.output_shift; |
| const int32 output_activation_min = params.quantized_activation_min; |
| const int32 output_activation_max = params.quantized_activation_max; |
| TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); |
| TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 2); |
| |
| TFLITE_DCHECK_LE(output_activation_min, output_activation_max); |
| const int filter_dim_count = filter_shape.DimensionsCount(); |
| const int batches = output_shape.Dims(0); |
| const int output_depth = output_shape.Dims(1); |
| TFLITE_DCHECK_LE(output_depth, filter_shape.Dims(filter_dim_count - 2)); |
| const int accum_depth = filter_shape.Dims(filter_dim_count - 1); |
| for (int b = 0; b < batches; ++b) { |
| for (int out_c = 0; out_c < output_depth; ++out_c) { |
| int64_t acc = 0; |
| for (int d = 0; d < accum_depth; ++d) { |
| int32 input_val = input_data[b * accum_depth + d]; |
| int32 filter_val = filter_data[out_c * accum_depth + d]; |
| acc += (filter_val + filter_offset) * input_val; |
| } |
| if (bias_data) { |
| acc += bias_data[out_c]; |
| } |
| int32_t acc_scaled = |
| MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); |
| acc_scaled = std::max(acc_scaled, output_activation_min); |
| acc_scaled = std::min(acc_scaled, output_activation_max); |
| output_data[out_c + output_depth * b] = static_cast<int16_t>(acc_scaled); |
| } |
| } |
| } |
| |
| } // namespace reference_integer_ops |
| } // namespace tflite |
| |
| #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_INTEGER_OPS_FULLY_CONNECTED_H_ |