use crate::error::ModelError;
use crate::neural_network::Tensor;
use crate::neural_network::layer::TrainingParameters;
use crate::neural_network::layer::activation_layer::format_output_shape;
use crate::neural_network::layer::layer_weight::LayerWeight;
use crate::neural_network::neural_network_trait::{ActivationLayer, Layer};
use ndarray::Zip;
const GRAD_CLIP_VALUE: f32 = 1e6;
const INPUT_CLIP_MIN: f32 = -500.0;
const INPUT_CLIP_MAX: f32 = 500.0;
const SIGMOID_PARALLEL_THRESHOLD: usize = 1000;
pub struct Sigmoid {
output_cache: Option<Tensor>,
}
impl Sigmoid {
pub fn new() -> Self {
Sigmoid { output_cache: None }
}
}
impl Layer for Sigmoid {
fn forward(&mut self, input: &Tensor) -> Result<Tensor, ModelError> {
if input.is_empty() {
return Err(ModelError::InputValidationError(
"Input tensor is empty".to_string(),
));
}
if input.iter().any(|&x| x.is_nan() || x.is_infinite()) {
return Err(ModelError::InputValidationError(
"Input tensor contains NaN or infinite values".to_string(),
));
}
let mut output = input.to_owned();
let sigmoid_fn = |x: f32| {
let clipped_x = x.clamp(INPUT_CLIP_MIN, INPUT_CLIP_MAX);
1.0 / (1.0 + (-clipped_x).exp())
};
if input.len() >= SIGMOID_PARALLEL_THRESHOLD {
output.par_mapv_inplace(sigmoid_fn);
} else {
output.mapv_inplace(sigmoid_fn);
}
self.output_cache = Some(output.clone());
Ok(output)
}
fn backward(&mut self, grad_output: &Tensor) -> Result<Tensor, ModelError> {
if let Some(output) = &self.output_cache {
if grad_output.shape() != output.shape() {
return Err(ModelError::ProcessingError(format!(
"Gradient output shape {:?} doesn't match output shape {:?}",
grad_output.shape(),
output.shape()
)));
}
if grad_output.iter().any(|&x| x.is_nan() || x.is_infinite()) {
return Err(ModelError::InputValidationError(
"Gradient output contains NaN or infinite values".to_string(),
));
}
let mut grad_input = grad_output.clone();
let gradient_fn = |grad: &mut f32, &out: &f32| {
let derivative = out * (1.0 - out);
*grad *= derivative;
if grad.is_nan() || grad.is_infinite() {
*grad = 0.0;
} else {
*grad = grad.clamp(-GRAD_CLIP_VALUE, GRAD_CLIP_VALUE);
}
};
if grad_output.len() >= SIGMOID_PARALLEL_THRESHOLD {
Zip::from(&mut grad_input)
.and(output)
.par_for_each(gradient_fn);
} else {
Zip::from(&mut grad_input).and(output).for_each(gradient_fn);
}
Ok(grad_input)
} else {
Err(ModelError::ProcessingError(
"Forward pass has not been run yet".to_string(),
))
}
}
fn layer_type(&self) -> &str {
"Sigmoid"
}
fn output_shape(&self) -> String {
format_output_shape(&self.output_cache)
}
no_trainable_parameters_layer_functions!();
}
impl ActivationLayer for Sigmoid {}