use crate::*;
use rayon::prelude::*;
impl ActivationOps for CpuStorage {
fn binary_step(&self) -> Self {
self.apply(|x| if x < 0. { 0. } else { 1. })
}
fn sigmoid(&self) -> Self {
self.apply(|x| 1./(1. + f32::exp(-x)))
}
fn tanh(&self) -> Self {
self.apply(|x| (f32::exp(x) - f32::exp(-x))/(f32::exp(x) + f32::exp(-x)))
}
fn relu(&self) -> Self {
self.apply(|x| f32::max(0., x))
}
fn leaky_relu(&self) -> Self {
self.apply(|x| f32::max(0.1*x, x))
}
fn parametric_relu(&self, a: f32) -> Self {
self.apply(|x| f32::max(a*x, a))
}
fn elu(&self, alpha: f32) -> Self {
self.apply(|x| if x >= 0. {x} else {alpha * (f32::exp(x) - 1.)})
}
fn softmax(&self, dim: usize) -> Self {
let outer: usize = self.shape()[..dim].iter().product();
let axis_len: usize = self.shape()[dim];
let inner: usize = self.shape()[dim + 1..].iter().product();
let total_elements = self.shape().iter().product();
let input: Vec<f32> = {
let binding = self.data();
let guard = binding.read().unwrap();
guard.clone()
};
let mut new_data = vec![0.0; total_elements];
let base_offset = self.offset();
new_data[base_offset..base_offset + outer * (axis_len * inner)]
.par_chunks_mut(axis_len * inner)
.enumerate()
.for_each(|(i, out_slice)| {
let in_start = base_offset + i * (axis_len * inner);
let in_end = in_start + (axis_len * inner);
let in_slice = &input[in_start..in_end];
for k in 0..inner {
let mut max_val = f32::NEG_INFINITY;
for j in 0..axis_len {
let idx = j * inner + k;
let v = in_slice[idx];
if v > max_val {
max_val = v;
}
}
let mut sum_exp = 0.0;
let mut exps = vec![0.0; axis_len];
for j in 0..axis_len {
let idx = j * inner + k;
let exp_val = f32::exp(in_slice[idx] - max_val);
exps[j] = exp_val;
sum_exp += exp_val;
}
for j in 0..axis_len {
let idx = j * inner + k;
out_slice[idx] = exps[j] / sum_exp;
}
}
});
CpuStorage::new(new_data, self.shape().clone())
}
fn swish(&self) -> Self {
self.apply(|x| x * (1./(1. + f32::exp(-x))))
}
}