use num_traits::Zero;
use tract_data::internal::f16;
use crate::frame::reduce::MapReduceKer;
#[derive(Clone, Debug)]
pub struct SSoftMaxL2;
impl MapReduceKer<f32, f32> for SSoftMaxL2 {
fn name() -> &'static str {
"generic"
}
fn alignment_items() -> usize {
4
}
fn nr() -> usize {
4
}
fn map_neutral() -> f32 {
f32::MIN
}
fn reduce_neutral() -> f32 {
0.
}
fn reduce_two(a: f32, b: f32) -> f32 {
a + b
}
fn run(x: &mut [f32], max: f32) -> f32 {
debug_assert!(x.len() % Self::nr() == 0);
debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
let mut sum = 0.;
for v in x.iter_mut() {
let y = *v - max;
let y = fast_compact_exp_f32(y);
*v = y;
sum += y;
}
sum
}
}
#[derive(Clone, Debug)]
pub struct HSoftMaxL2;
impl MapReduceKer<f16, f16> for HSoftMaxL2 {
fn name() -> &'static str {
"generic"
}
fn alignment_items() -> usize {
8
}
fn nr() -> usize {
8
}
fn map_neutral() -> f16 {
f16::MIN
}
fn reduce_neutral() -> f16 {
f16::zero()
}
fn reduce_two(a: f16, b: f16) -> f16 {
a + b
}
fn run(x: &mut [f16], max: f16) -> f16 {
debug_assert!(x.len() % Self::nr() == 0);
debug_assert!(x.as_ptr() as usize % Self::alignment_bytes() == 0);
let mut sum = f16::zero();
for v in x.iter_mut() {
let y = *v - max;
let y = f16::from_f32(fast_compact_exp_f32(y.to_f32()));
*v = y;
sum += y;
}
sum
}
}
pub fn fast_compact_exp_f32(v: f32) -> f32 {
const MLN2: f32 = 0.6931471805f32;
const A: f32 = 8388608.0f32;
const B: f32 = 1065353216.0f32;
const C: f32 = 60801.0f32;
const SLOPE: f32 = A / MLN2;
const OFFSET: f32 = B - C;
f32::from_bits(((SLOPE * v) + OFFSET) as u32)
}
#[cfg(test)]
#[macro_use]
pub mod s {
softmax_l2_frame_tests!(true, f32, crate::generic::softmax::SSoftMaxL2);
}
#[cfg(test)]
#[macro_use]
pub mod h {
use super::*;
softmax_l2_frame_tests!(true, f16, crate::generic::softmax::HSoftMaxL2);
}