-
Notifications
You must be signed in to change notification settings - Fork 247
Expand file tree
/
Copy pathActivationFncs_kernels.cpp
More file actions
84 lines (73 loc) · 3.11 KB
/
ActivationFncs_kernels.cpp
File metadata and controls
84 lines (73 loc) · 3.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include "ActivationFncs_kernels.hpp"
namespace uTensor {
void sq_softmax_k(Tensor& out, const Tensor& in, int8_t beta) {
const float beta_f = static_cast<float>(beta);
const TensorShape& inShape = in->get_shape();
int outer_dim = inShape.num_dims() -1;
int depth = inShape[outer_dim];
int out_side_numelems = 1;
for(int i = 0; i < inShape.num_dims(); i++){
out_side_numelems *= (i == outer_dim) ? 1: inShape[i];
}
for (int i = 0; i < out_side_numelems; i++) {
// exp(x[i])/sum(exp(x[i])) == exp(x[i]+C)/sum(exp(x[i]+C))
float max = static_cast<float>(std::numeric_limits<int8_t>::lowest());
for(int j = 0; j < depth; j++){
max = std::max(max, static_cast<float>(static_cast<int8_t>(in(i, j))));
}
float mSum = 0;
for(int j = 0; j < depth; j++){
const int32_t in32 = static_cast<int8_t>(in(i,j));
const float in_scale = in->get_quantization_params().get_scale_for_channel(0);
const int32_t in_zp = in->get_quantization_params().get_zeroP_for_channel(0);
const float in_f = (in32 - in_zp)*in_scale;
const float tmp = exp((in_f - max) * beta_f);
mSum += tmp;
//out(i,j) = tmp;
}
// TODO FIXME SLOW but mem efficient
for(int j = 0; j < depth; j++){
const int32_t in32 = static_cast<int8_t>(in(i,j));
const float in_scale = in->get_quantization_params().get_scale_for_channel(0);
const int32_t in_zp = in->get_quantization_params().get_zeroP_for_channel(0);
const float in_f = (in32 - in_zp)*in_scale;
const float out_val = exp((in_f - max) * beta_f) / mSum;
const float oscale = out->get_quantization_params().get_scale_for_channel(0);
const int32_t ozp = out->get_quantization_params().get_zeroP_for_channel(0);
const int32_t otmp = static_cast<int32_t>(out_val/oscale) + ozp;
const int8_t out8 = (otmp < -127 ) ? -128 : (otmp > 127) ? 127 : static_cast<int8_t>(otmp);
out(i, j) = out8;
}
}
}
template <>
void relu_k_impl<float>::operator()(Tensor& out, const Tensor& in) const {
using T = float;
T tmp;
uint32_t in_size = in->get_shape().get_linear_size();
for (uint32_t i = 0; i < in_size; i++) {
tmp = in(i);
if (tmp < 0) {
tmp = static_cast<T>(0);
}
out(i) = tmp;
}
}
template <>
void sigmoid_k_impl<int8_t>::operator()(Tensor& out, const Tensor& in) const {
const float one = 1;
uint32_t t_size = in->get_shape().get_linear_size();
for (uint32_t i = 0; i < t_size; i++) {
const int32_t in32 = static_cast<int8_t>(in(i));
const float in_scale = in->get_quantization_params().get_scale_for_channel(0);
const int32_t in_zp = in->get_quantization_params().get_zeroP_for_channel(0);
const float in_f = (in32 - in_zp)*in_scale;
const float out_val = one / (one + exp( -in_f ));
const float oscale = out->get_quantization_params().get_scale_for_channel(0);
const int32_t ozp = out->get_quantization_params().get_zeroP_for_channel(0);
const int32_t otmp = static_cast<int32_t>(out_val/oscale) + ozp;
const int8_t out8 = (otmp < -127 ) ? -128 : (otmp > 127) ? 127 : static_cast<int8_t>(otmp);
out(i) = out8;
}
}
}