ferrite/tensor/device/cpu/kernels/
activation.rs

1use crate::*;
2use rayon::prelude::*;
3
4
5impl ActivationOps for CpuStorage {
6  fn binary_step(&self) -> Self {
7    self.apply(|x| if x < 0. { 0. } else { 1. })
8  }
9
10  fn sigmoid(&self) -> Self {
11    self.apply(|x| 1./(1. + f32::exp(-x)))
12  }
13
14  fn tanh(&self) -> Self {
15    self.apply(|x| (f32::exp(x) - f32::exp(-x))/(f32::exp(x) + f32::exp(-x)))
16  }
17
18  fn relu(&self) -> Self {
19    self.apply(|x| f32::max(0., x))
20  }
21
22  fn leaky_relu(&self) -> Self {
23    self.apply(|x| f32::max(0.1*x, x))
24  }
25
26  fn parametric_relu(&self, a: f32) -> Self {
27    self.apply(|x| f32::max(a*x, a))
28  }
29
30  fn elu(&self, alpha: f32) -> Self {
31    self.apply(|x| if x >= 0. {x} else {alpha * (f32::exp(x) - 1.)})
32  }
33
34  fn softmax(&self, dim: usize) -> Self {
35    // Compute dimensions:
36    // - outer: product of dimensions before `dim`
37    // - axis_len: size of the softmax dimension (i.e. at `dim`)
38    // - inner: product of dimensions after `dim`
39    let outer: usize = self.shape()[..dim].iter().product();
40    let axis_len: usize = self.shape()[dim];
41    let inner: usize = self.shape()[dim + 1..].iter().product();
42    let total_elements = self.shape().iter().product();
43
44    // Acquire a read lock and clone the input data.
45    let input: Vec<f32> = {
46      let binding = self.data();
47      let guard = binding.read().unwrap();
48      guard.clone()
49    };
50
51    // Allocate an output vector of the same size.
52    let mut new_data = vec![0.0; total_elements];
53    let base_offset = self.offset();
54
55    // We assume the tensor is contiguous, so the region from base_offset to
56    // base_offset + outer*(axis_len*inner) contains the relevant data.
57    // Split this region into `outer` mutable chunks, each of length axis_len*inner.
58    new_data[base_offset..base_offset + outer * (axis_len * inner)]
59      .par_chunks_mut(axis_len * inner)
60      .enumerate()
61      .for_each(|(i, out_slice)| {
62        // Compute the corresponding slice from the input data.
63        let in_start = base_offset + i * (axis_len * inner);
64        let in_end = in_start + (axis_len * inner);
65        let in_slice = &input[in_start..in_end];
66
67        // For each inner index (each column within the slice)
68        for k in 0..inner {
69          // Find the maximum value along the softmax axis for numerical stability.
70          let mut max_val = f32::NEG_INFINITY;
71          for j in 0..axis_len {
72            let idx = j * inner + k;
73            let v = in_slice[idx];
74            if v > max_val {
75              max_val = v;
76            }
77          }
78
79          // Compute the exponentials and their sum.
80          let mut sum_exp = 0.0;
81          let mut exps = vec![0.0; axis_len];
82          for j in 0..axis_len {
83            let idx = j * inner + k;
84            let exp_val = f32::exp(in_slice[idx] - max_val);
85            exps[j] = exp_val;
86            sum_exp += exp_val;
87          }
88
89          // Normalize the exponentials and write the results to out_slice.
90          for j in 0..axis_len {
91            let idx = j * inner + k;
92            out_slice[idx] = exps[j] / sum_exp;
93          }
94        }
95      });
96
97    // Return a new CpuStorage with the same shape as the original.
98    CpuStorage::new(new_data, self.shape().clone())
99  }
100
101  fn swish(&self) -> Self {
102    self.apply(|x| x * (1./(1. + f32::exp(-x))))
103  }
104}