ferrite/tensor/device/cpu/kernels/
activation.rs1use crate::*;
2use rayon::prelude::*;
3
4
5impl ActivationOps for CpuStorage {
6 fn binary_step(&self) -> Self {
7 self.apply(|x| if x < 0. { 0. } else { 1. })
8 }
9
10 fn sigmoid(&self) -> Self {
11 self.apply(|x| 1./(1. + f32::exp(-x)))
12 }
13
14 fn tanh(&self) -> Self {
15 self.apply(|x| (f32::exp(x) - f32::exp(-x))/(f32::exp(x) + f32::exp(-x)))
16 }
17
18 fn relu(&self) -> Self {
19 self.apply(|x| f32::max(0., x))
20 }
21
22 fn leaky_relu(&self) -> Self {
23 self.apply(|x| f32::max(0.1*x, x))
24 }
25
26 fn parametric_relu(&self, a: f32) -> Self {
27 self.apply(|x| f32::max(a*x, a))
28 }
29
30 fn elu(&self, alpha: f32) -> Self {
31 self.apply(|x| if x >= 0. {x} else {alpha * (f32::exp(x) - 1.)})
32 }
33
34 fn softmax(&self, dim: usize) -> Self {
35 let outer: usize = self.shape()[..dim].iter().product();
40 let axis_len: usize = self.shape()[dim];
41 let inner: usize = self.shape()[dim + 1..].iter().product();
42 let total_elements = self.shape().iter().product();
43
44 let input: Vec<f32> = {
46 let binding = self.data();
47 let guard = binding.read().unwrap();
48 guard.clone()
49 };
50
51 let mut new_data = vec![0.0; total_elements];
53 let base_offset = self.offset();
54
55 new_data[base_offset..base_offset + outer * (axis_len * inner)]
59 .par_chunks_mut(axis_len * inner)
60 .enumerate()
61 .for_each(|(i, out_slice)| {
62 let in_start = base_offset + i * (axis_len * inner);
64 let in_end = in_start + (axis_len * inner);
65 let in_slice = &input[in_start..in_end];
66
67 for k in 0..inner {
69 let mut max_val = f32::NEG_INFINITY;
71 for j in 0..axis_len {
72 let idx = j * inner + k;
73 let v = in_slice[idx];
74 if v > max_val {
75 max_val = v;
76 }
77 }
78
79 let mut sum_exp = 0.0;
81 let mut exps = vec![0.0; axis_len];
82 for j in 0..axis_len {
83 let idx = j * inner + k;
84 let exp_val = f32::exp(in_slice[idx] - max_val);
85 exps[j] = exp_val;
86 sum_exp += exp_val;
87 }
88
89 for j in 0..axis_len {
91 let idx = j * inner + k;
92 out_slice[idx] = exps[j] / sum_exp;
93 }
94 }
95 });
96
97 CpuStorage::new(new_data, self.shape().clone())
99 }
100
101 fn swish(&self) -> Self {
102 self.apply(|x| x * (1./(1. + f32::exp(-x))))
103 }
104}