66#include " caffe2/core/logging.h"
77#include " caffe2/core/operator.h"
88#include " caffe2/operators/reducer_functors.h"
9- #include " caffe2/utils/eigen_utils .h"
9+ #include " caffe2/perfkernels/fused_8bit_rowwise_conversion .h"
1010#include " caffe2/utils/math.h"
1111
1212C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10 (Fused8BitRowwiseQuantizedToFloat);
1313
1414namespace caffe2 {
1515
16- #define IS_LITTLE_ENDIAN \
17- [] { \
18- const int32_t kValue = 1 ; \
19- return reinterpret_cast <const uint8_t *>(&kValue )[0 ] == 1 ; \
16+ #define IS_LITTLE_ENDIAN \
17+ [] { \
18+ const int32_t kValue = 1 ; \
19+ return reinterpret_cast <const std:: uint8_t *>(&kValue )[0 ] == 1 ; \
2020 }()
2121
2222template <
2323 typename T,
2424 void (*convert)(float * dst, const T* src, size_t N),
25+ bool HAS_CONVERT,
2526 class Context >
2627class FloatToFused8BitRowwiseQuantizedOp : public Operator <Context> {
2728 public:
@@ -45,40 +46,38 @@ class FloatToFused8BitRowwiseQuantizedOp : public Operator<Context> {
4546 // bytes of each row for scale (4 bytes) and bias (4 bytes).
4647 // | ... int8 data ... | scale | bias |
4748 // | number_of_columns | 4B | 4B |
48- const std::vector<int64_t > output_dimensions = {input_rows,
49- input_columns + 8 };
49+ const std::vector<std::int64_t > output_dimensions = {
50+ input_rows,
51+ input_columns + static_cast <std::int64_t >(2 * sizeof (float ))};
5052 auto * output = Output (
51- DATA_FUSED_SCALE_BIAS_INT8, output_dimensions, at::dtype<uint8_t >());
53+ DATA_FUSED_SCALE_BIAS_INT8,
54+ output_dimensions,
55+ at::dtype<std::uint8_t >());
5256
5357 const auto * input_data = input.template data <T>();
54- auto * output_data = output->template mutable_data <uint8_t >();
58+ auto * output_data = output->template mutable_data <std:: uint8_t >();
5559 const auto output_columns = output->size (1 );
5660
57- if (!std::is_same<T, float >::value && !std::is_same<T, at::Half>::value) {
58- CAFFE_THROW (" Unsupported data type" );
61+ bool is_float = std::is_same<T, float >::value;
62+ if (!HAS_CONVERT) {
63+ CAFFE_ENFORCE (is_float, " convert can be nullptr only if T is float" );
64+ FloatToFused8BitRowwiseQuantized (
65+ reinterpret_cast <const float *>(input_data),
66+ input_rows,
67+ input_columns,
68+ output_data);
69+ return true ;
5970 }
6071
61- vector<float > tmp;
62- tmp.resize (input_columns, 0.0 );
72+ bool is_half = std::is_same<T, at::Half>::value;
73+ CAFFE_ENFORCE (is_float || is_half);
74+
75+ vector<float > tmp (input_columns);
6376
6477 for (size_t row = 0 ; row < input_rows; ++row) {
6578 convert (tmp.data (), input_data + row * input_columns, input_columns);
66- ConstEigenVectorArrayMap<float > input_row (tmp.data (), input_columns);
67- uint8_t * output_row = output_data + row * output_columns;
68- EigenVectorArrayMap<uint8_t > output_row_values (output_row, input_columns);
69- EigenVectorArrayMap<float > output_row_scale_bias (
70- reinterpret_cast <float *>(output_row + input_columns), 2 );
71-
72- const float minimum_element = input_row.minCoeff ();
73- const float maximum_element = input_row.maxCoeff ();
74- const float range = maximum_element - minimum_element;
75-
76- output_row_scale_bias (0 ) = range / 255 .0f ;
77- output_row_scale_bias (1 ) = minimum_element;
78- const auto inverse_scale = 255 .0f / (range + kEpsilon );
79- output_row_values = ((input_row - minimum_element) * inverse_scale)
80- .round ()
81- .cast <uint8_t >();
79+ FloatToFused8BitRowwiseQuantized (
80+ tmp.data (), 1 , input_columns, output_data + row * output_columns);
8281 }
8382
8483 return true ;
@@ -92,6 +91,7 @@ class FloatToFused8BitRowwiseQuantizedOp : public Operator<Context> {
9291template <
9392 typename T,
9493 void (*convert)(T* dst, const float * src, size_t N),
94+ bool HAS_CONVERT,
9595 class Context >
9696class Fused8BitRowwiseQuantizedToFloatOp : public Operator <Context> {
9797 public:
@@ -109,28 +109,35 @@ class Fused8BitRowwiseQuantizedToFloatOp : public Operator<Context> {
109109
110110 // The last 8 bytes per row are the scale and the bias. The rest of
111111 // input_columns is the number of values in the original row.
112- const std::vector<int64_t > output_dimensions = {input_rows,
113- input_columns - 8 };
112+ const std::vector<std::int64_t > output_dimensions = {
113+ input_rows,
114+ input_columns - static_cast <std::int64_t >(2 * sizeof (float ))};
114115 auto * output = Output (DATA_FLOAT, output_dimensions, at::dtype<T>());
115116 const auto output_columns = output->size (1 );
116117
117- const auto * input_data = input.template data <uint8_t >();
118+ const auto * input_data = input.template data <std:: uint8_t >();
118119 T* output_data = output->template mutable_data <T>();
119120
120- vector<float > tmp;
121- tmp.resize (input_columns, 0.0 );
121+ bool is_float = std::is_same<T, float >::value;
122122
123- for (size_t row = 0 ; row < input_rows; ++row) {
124- const uint8_t * input_row = input_data + row * input_columns;
125- ConstEigenVectorArrayMap<uint8_t > input_row_values (
126- input_row, output_columns);
127- ConstEigenVectorArrayMap<float > input_row_scale_bias (
128- reinterpret_cast <const float *>(input_row + output_columns), 2 );
123+ if (!HAS_CONVERT) {
124+ CAFFE_ENFORCE (is_float, " convert can be nullptr only if T is float" );
125+ Fused8BitRowwiseQuantizedToFloat (
126+ input_data,
127+ input_rows,
128+ input_columns,
129+ reinterpret_cast <float *>(output_data));
130+ return true ;
131+ }
129132
130- EigenVectorArrayMap<float > output_row (tmp.data (), output_columns);
131- output_row = input_row_values.cast <float >() * input_row_scale_bias (0 ) +
132- input_row_scale_bias (1 );
133+ bool is_half = std::is_same<T, at::Half>::value;
134+ CAFFE_ENFORCE (is_float || is_half);
133135
136+ vector<float > tmp (input_columns);
137+
138+ for (size_t row = 0 ; row < input_rows; ++row) {
139+ Fused8BitRowwiseQuantizedToFloat (
140+ input_data + row * input_columns, 1 , input_columns, tmp.data ());
134141 convert (output_data + row * output_columns, tmp.data (), output_columns);
135142 }
136143 return true ;
0 commit comments