//! Dropout layer implementation
//!
//! This module provides implementation of dropout regularization
//! for neural networks as described in "Dropout: A Simple Way to Prevent Neural Networks
//! from Overfitting" by Srivastava et al.
use crate::error::{NeuralError, Result};
use rand::rng;
use crate::layers::Layer;
use ndarray::{Array, IxDyn, ScalarOperand};
use num_traits::Float;
use rand::{Rng, RngCore, SeedableRng};
// use std::cell::RefCell;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::sync::{Arc, RwLock};
/// Dropout layer
///
/// During training, randomly sets input elements to zero with probability `p`.
/// During inference, scales the output by 1/(1-p) to maintain the expected value.
/// # Examples
/// ```
/// use scirs2_neural::layers::{Dropout, Layer};
/// use ndarray::{Array, Array2};
/// use rand::rngs::SmallRng;
/// use rand::SeedableRng;
/// // Create a dropout layer with 0.5 dropout probability
/// let mut rng = rand::rng();
/// let dropout = Dropout::new(0.5, &mut rng).unwrap();
/// // Forward pass with a batch of 2 samples, 10 features
/// let batch_size = 2;
/// let features = 10;
/// let input = Array2::<f64>::from_elem((batch_size, features), 1.0).into_dyn();
/// // Forward pass in training mode (some values will be dropped)
/// let output = dropout.forward(&input).unwrap();
/// // Output shape should match input shape
/// assert_eq!(output.shape(), input.shape());
// We need to manually implement Debug because dyn RngCore doesn't implement Debug
pub struct Dropout<F: Float + Debug + Send + Sync> {
/// Probability of dropping an element
p: F,
/// Random number generator
rng: Arc<RwLock<Box<dyn RngCore + Send + Sync>>>,
/// Whether we're in training mode
training: bool,
/// Input cache for backward pass
input_cache: Arc<RwLock<Option<Array<F, IxDyn>>>>,
/// Mask cache for backward pass (1 for kept elements, 0 for dropped)
mask_cache: Arc<RwLock<Option<Array<F, IxDyn>>>>,
/// Phantom data for type parameter
_phantom: PhantomData<F>,
}
// Manual implementation of Debug because dyn RngCore doesn't implement Debug
impl<F: Float + Debug + Send + Sync> std::fmt::Debug for Dropout<F> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Dropout")
.field("p", &self.p)
.field("rng", &"<dyn RngCore>")
.field("training", &self.training)
.finish()
}
// Manual implementation of Clone
impl<F: Float + Debug + Send + Sync> Clone for Dropout<F> {
fn clone(&self) -> Self {
let rng = rand::rngs::SmallRng::seed_from_u64(42);
Self {
p: self.p,
rng: Arc::new(RwLock::new(Box::new(rng))),
training: self.training,
input_cache: Arc::new(RwLock::new(None)),
mask_cache: Arc::new(RwLock::new(None)),
_phantom: PhantomData,
}
impl<F: Float + Debug + ScalarOperand + Send + Sync + 'static> Dropout<F> {
/// Create a new dropout layer
///
/// # Arguments
/// * `p` - Dropout probability (0.0 to 1.0)
/// * `rng` - Random number generator
/// # Returns
/// * A new dropout layer
pub fn new<R: Rng + 'static + Clone + Send + Sync>(p: f64, rng: &mut R) -> Result<Self> {
if !(0.0..1.0).contains(&p) {
return Err(NeuralError::InvalidArchitecture(
"Dropout probability must be in [0, 1)".to_string(),
));
let p = F::from(p).ok_or_else(|| {
NeuralError::InvalidArchitecture(
"Failed to convert dropout probability to type F".to_string(),
)
})?;
Ok(Self {
p,
rng: Arc::new(RwLock::new(Box::new(rng.clone()))),
training: true,
})
/// Set the training mode
/// In training mode, elements are randomly dropped.
/// In inference mode, all elements are kept but scaled.
pub fn set_training(&mut self, training: bool) {
self.training = training;
/// Get the dropout probability
pub fn p(&self) -> f64 {
self.p.to_f64().unwrap_or(0.0)
/// Get the training mode
pub fn is_training(&self) -> bool {
self.training
impl<F: Float + Debug + ScalarOperand + Send + Sync + 'static> Layer<F> for Dropout<F> {
fn as_any(&self) -> &dyn std::any::Any {
self
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>> {
// Cache input for backward pass
if let Ok(mut cache) = self.input_cache.write() {
*cache = Some(input.clone());
} else {
return Err(NeuralError::InferenceError(
"Failed to acquire write lock on input cache".to_string(),
if !self.training || self.p == F::zero() {
// In inference mode or with p=0, just pass through the input as is
return Ok(input.clone());
// In training mode, create a binary mask and apply it
let mut mask = Array::<F, _>::from_elem(input.dim(), F::one());
let one = F::one();
let zero = F::zero();
// Apply the dropout mask
{
let mut rng_guard = match self.rng.write() {
Ok(guard) => guard,
Err(_) => {
return Err(NeuralError::InferenceError(
"Failed to acquire write lock on RNG".to_string(),
))
}
};
for elem in mask.iter_mut() {
if (**rng_guard).random::<f64>() < self.p.to_f64().unwrap() {
*elem = zero;
}
// Scale by 1/(1-p) to maintain expected value
let scale = one / (one - self.p);
// Cache the mask for backward pass
if let Ok(mut cache) = self.mask_cache.write() {
*cache = Some(mask.clone());
"Failed to acquire write lock on mask cache".to_string(),
// Apply mask and scale
let output = input * &mask * scale;
Ok(output)
fn backward(
&self,
_input: &Array<F, IxDyn>,
grad_output: &Array<F, IxDyn>,
) -> Result<Array<F, IxDyn>> {
// In inference mode or with p=0, just pass through the gradient
return Ok(grad_output.clone());
// Retrieve cached mask
let mask_ref = match self.mask_cache.read() {
Ok(guard) => guard,
Err(_) => {
return Err(NeuralError::InferenceError(
"Failed to acquire read lock on mask cache".to_string(),
))
};
if mask_ref.is_none() {
"No cached mask for backward pass. Call forward() first.".to_string(),
let mask = mask_ref.as_ref().unwrap();
// Scale factor is the same as in forward pass
// Apply mask and scale to the gradient
let grad_input = grad_output * mask * scale;
Ok(grad_input)
fn update(&mut self, _learning_rate: F) -> Result<()> {
// Dropout has no parameters to update
Ok(())
fn layer_type(&self) -> &str {
"Dropout"
fn parameter_count(&self) -> usize {
// Dropout layer has no trainable parameters
0
fn layer_description(&self) -> String {
format!(
"type:Dropout, p:{}, training:{}",
self.p.to_f64().unwrap_or(0.0),
self.training
)
fn set_training(&mut self, training: bool) {
fn is_training(&self) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use ndarray::Array2;
use rand::rngs::SmallRng;
use rand::SeedableRng;
#[test]
fn test_dropout_shape() {
// Set up dropout
let mut rng = rand::rng();
let dropout = Dropout::<f64>::new(0.5, &mut rng).unwrap();
// Create a batch of inputs
let batch_size = 2;
let features = 10;
let input = Array2::<f64>::from_elem((batch_size, features), 1.0).into_dyn();
// Forward pass
let output = dropout.forward(&input).unwrap();
// Check output shape
assert_eq!(output.shape(), input.shape());
}
#[test]
fn test_dropout_training_mode() {
let mut rng = rand::rng();
let mut dropout = Dropout::<f64>::new(0.5, &mut rng).unwrap();
// Ensure training mode
dropout.set_training(true);
let batch_size = 100;
let features = 10;
let input = Array2::<f64>::from_elem((batch_size, features), 1.0).into_dyn();
let output = dropout.forward(&input).unwrap();
// Count dropped (zero) elements
let mut dropped_count = 0;
for &val in output.iter() {
if val == 0.0 {
dropped_count += 1;
}
}
// We expect approximately 50% of elements to be dropped
// Allow for some statistical variation
let total_elements = batch_size * features;
let drop_rate = dropped_count as f64 / total_elements as f64;
assert!(
drop_rate > 0.4 && drop_rate < 0.6,
"Expected drop rate around 0.5, got {}",
drop_rate
);
}
#[test]
fn test_dropout_inference_mode() {
let mut rng = rand::rng();
let mut dropout = Dropout::<f64>::new(0.5, &mut rng).unwrap();
// Set to inference mode
dropout.set_training(false);
let input = Array2::<f64>::from_elem((2, 10), 1.0).into_dyn();
let output = dropout.forward(&input).unwrap();
// In inference mode, all elements should pass through unchanged
for &val in output.iter() {
assert_eq!(val, 1.0);
}
}
fn test_dropout_zero_probability() {
// Set up dropout with p=0 (no dropout)
let dropout = Dropout::<f64>::new(0.0, &mut rng).unwrap();
// With p=0, all elements should pass through unchanged
fn test_dropout_backward() {
// Forward pass to create mask
// Create gradient
let grad_output = Array2::<f64>::from_elem((batch_size, features), 1.0).into_dyn();
// Backward pass
let grad_input = dropout.backward(&input, &grad_output).unwrap();
// Check that grad_input has same shape
assert_eq!(grad_input.shape(), input.shape());
// Elements that were set to zero in the forward pass should also be zero in the backward pass
for (out, grad) in output.iter().zip(grad_input.iter()) {
if *out == 0.0 {
assert_eq!(*grad, 0.0);
} else {
// Non-zero elements should have the same gradient scale as in forward pass
assert_eq!(*grad, 2.0); // scale = 1/(1-0.5) = 2.0