mod advanced;
#[cfg(test)]
mod tests;
use crate::backends::scalar::ScalarBackend;
use crate::backends::VectorBackend;
use crate::vector::Vector;
use crate::{Backend, Result, TruenoError};
macro_rules! dispatch_unary_op {
($backend:expr, $op:ident, $input:expr, $output:expr) => {{
#[cfg(target_arch = "x86_64")]
use crate::backends::{avx2::Avx2Backend, sse2::Sse2Backend};
unsafe {
match $backend {
Backend::Scalar => ScalarBackend::$op($input, $output),
#[cfg(target_arch = "x86_64")]
Backend::SSE2 | Backend::AVX => Sse2Backend::$op($input, $output),
#[cfg(target_arch = "x86_64")]
Backend::AVX2 | Backend::AVX512 => Avx2Backend::$op($input, $output),
#[cfg(not(target_arch = "x86_64"))]
Backend::SSE2 | Backend::AVX | Backend::AVX2 | Backend::AVX512 => {
ScalarBackend::$op($input, $output)
}
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
Backend::NEON => {
use crate::backends::neon::NeonBackend;
NeonBackend::$op($input, $output)
}
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
Backend::NEON => ScalarBackend::$op($input, $output),
#[cfg(target_arch = "wasm32")]
Backend::WasmSIMD => {
use crate::backends::wasm::WasmBackend;
WasmBackend::$op($input, $output)
}
#[cfg(not(target_arch = "wasm32"))]
Backend::WasmSIMD => ScalarBackend::$op($input, $output),
Backend::GPU | Backend::Auto => ScalarBackend::$op($input, $output),
}
}
}};
}
pub(crate) use dispatch_unary_op;
impl Vector<f32> {
pub fn softmax(&self) -> Result<Self> {
if self.data.is_empty() {
return Err(TruenoError::EmptyVector);
}
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
const GPU_THRESHOLD: usize = usize::MAX;
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
{
if self.data.len() >= GPU_THRESHOLD {
use crate::backends::gpu::GpuDevice;
if GpuDevice::is_available() {
let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
let mut result = vec![0.0; self.data.len()];
if gpu.softmax(&self.data, &mut result).is_ok() {
return Ok(Vector::from_vec(result));
}
}
}
}
let max_val = self.max()?;
let exp_vals: Vec<f32> = self.data.iter().map(|&x| (x - max_val).exp()).collect();
let sum_exp: f32 = exp_vals.iter().sum();
let safe_sum = sum_exp.max(f32::EPSILON);
let data: Vec<f32> = exp_vals.iter().map(|&e| e / safe_sum).collect();
Ok(Vector::from_vec(data))
}
pub fn log_softmax(&self) -> Result<Self> {
if self.data.is_empty() {
return Err(TruenoError::EmptyVector);
}
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
const GPU_THRESHOLD: usize = usize::MAX;
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
{
if self.data.len() >= GPU_THRESHOLD {
use crate::backends::gpu::GpuDevice;
if GpuDevice::is_available() {
let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
let mut result = vec![0.0; self.data.len()];
if gpu.log_softmax(&self.data, &mut result).is_ok() {
return Ok(Vector::from_vec(result));
}
}
}
}
let max_val = self.max()?;
let exp_vals: Vec<f32> = self.data.iter().map(|&x| (x - max_val).exp()).collect();
let sum_exp: f32 = exp_vals.iter().sum();
let log_sum_exp = sum_exp.max(f32::EPSILON).ln();
let data: Vec<f32> = self.data.iter().map(|&x| x - max_val - log_sum_exp).collect();
Ok(Vector::from_vec(data))
}
pub fn relu(&self) -> Result<Self> {
if self.data.is_empty() {
return Err(TruenoError::EmptyVector);
}
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
const GPU_THRESHOLD: usize = usize::MAX;
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
{
if self.data.len() >= GPU_THRESHOLD {
use crate::backends::gpu::GpuDevice;
if GpuDevice::is_available() {
let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
let mut result = vec![0.0; self.data.len()];
if gpu.relu(&self.data, &mut result).is_ok() {
return Ok(Vector::from_vec(result));
}
}
}
}
let mut result = vec![0.0; self.len()];
#[cfg(feature = "parallel")]
{
const PARALLEL_THRESHOLD: usize = 500_000; const CHUNK_SIZE: usize = 65536;
if self.len() >= PARALLEL_THRESHOLD {
use rayon::prelude::*;
self.data.par_chunks(CHUNK_SIZE).zip(result.par_chunks_mut(CHUNK_SIZE)).for_each(
|(chunk_in, chunk_out)| {
dispatch_unary_op!(self.backend, relu, chunk_in, chunk_out);
},
);
return Ok(Vector::from_vec(result)); }
}
dispatch_unary_op!(self.backend, relu, &self.data, &mut result);
Ok(Vector::from_vec(result)) }
pub fn sigmoid(&self) -> Result<Self> {
if self.data.is_empty() {
return Err(TruenoError::EmptyVector);
}
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
const GPU_THRESHOLD: usize = usize::MAX;
#[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
{
if self.data.len() >= GPU_THRESHOLD {
use crate::backends::gpu::GpuDevice;
if GpuDevice::is_available() {
let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
let mut result = vec![0.0; self.data.len()];
if gpu.sigmoid(&self.data, &mut result).is_ok() {
return Ok(Vector::from_vec(result));
}
}
}
}
let mut result = vec![0.0; self.len()];
dispatch_unary_op!(self.backend, sigmoid, &self.data, &mut result);
Ok(Vector::from_vec(result))
}
}