use std::sync::Arc;
use ferrotorch_core::autograd::no_grad::is_grad_enabled;
use ferrotorch_core::gpu_dispatch::GpuRngState;
use ferrotorch_core::tensor::GradFn;
use ferrotorch_core::{FerrotorchError, FerrotorchResult, Float, Tensor, TensorStorage};
use crate::module::Module;
use crate::parameter::Parameter;
fn xorshift_seed() -> u64 {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::time::SystemTime;
let mut hasher = DefaultHasher::new();
SystemTime::now().hash(&mut hasher);
std::thread::current().id().hash(&mut hasher);
let mut state = hasher.finish();
if state == 0 {
state = 0xdeadbeefcafe;
}
state
}
#[inline]
fn xorshift_next(state: &mut u64) -> f64 {
*state ^= *state << 13;
*state ^= *state >> 7;
*state ^= *state << 17;
(*state as f64) / (u64::MAX as f64)
}
#[allow(dead_code)]
const PHILOX_M0: u32 = 0xD2511F53;
#[allow(dead_code)]
const PHILOX_M1: u32 = 0xCD9E8D57;
#[allow(dead_code)]
const PHILOX_W0: u32 = 0x9E3779B9;
#[allow(dead_code)]
const PHILOX_W1: u32 = 0xBB67AE85;
#[allow(dead_code)]
#[inline]
fn philox_round(c0: u32, c1: u32, c2: u32, c3: u32, k0: u32, k1: u32) -> (u32, u32, u32, u32) {
let prod0 = (PHILOX_M0 as u64) * (c0 as u64);
let hi0 = (prod0 >> 32) as u32;
let lo0 = prod0 as u32;
let prod1 = (PHILOX_M1 as u64) * (c2 as u64);
let hi1 = (prod1 >> 32) as u32;
let lo1 = prod1 as u32;
let new_c0 = hi1 ^ c1 ^ k0;
let new_c1 = lo1;
let new_c2 = hi0 ^ c3 ^ k1;
let new_c3 = lo0;
(new_c0, new_c1, new_c2, new_c3)
}
#[allow(dead_code)]
fn philox_4x32_10(counter: u64, key: u64) -> [u32; 4] {
let mut c0 = counter as u32;
let mut c1 = (counter >> 32) as u32;
let mut c2 = 0u32;
let mut c3 = 0u32;
let mut k0 = key as u32;
let mut k1 = (key >> 32) as u32;
for _ in 0..9 {
(c0, c1, c2, c3) = philox_round(c0, c1, c2, c3, k0, k1);
k0 = k0.wrapping_add(PHILOX_W0);
k1 = k1.wrapping_add(PHILOX_W1);
}
(c0, c1, c2, c3) = philox_round(c0, c1, c2, c3, k0, k1);
[c0, c1, c2, c3]
}
fn philox_dropout_mask<T: Float>(
numel: usize,
threshold: u32,
scale: T,
rng_state: &GpuRngState,
) -> Vec<T> {
let zero = <T as num_traits::Zero>::zero();
let derived_seed = (rng_state.counter() ^ rng_state.seed()) as u32;
(0..numel)
.map(|i| {
let mut r = (i as u32).wrapping_mul(2654435761) ^ derived_seed;
r ^= r << 13;
r ^= r >> 17;
r ^= r << 5;
if r < threshold { zero } else { scale }
})
.collect()
}
#[derive(Debug)]
struct DropoutBackward<T: Float> {
input: Tensor<T>,
scaled_mask: Tensor<T>,
}
impl<T: Float> GradFn<T> for DropoutBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let da = if self.input.requires_grad() {
let g = ferrotorch_core::grad_fns::arithmetic::mul(grad_output, &self.scaled_mask)?;
Some(g)
} else {
None
};
Ok(vec![da])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"DropoutBackward"
}
}
#[derive(Debug)]
struct Dropout2dBackward<T: Float> {
input: Tensor<T>,
scaled_mask: Vec<T>,
}
impl<T: Float> GradFn<T> for Dropout2dBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda {
op: "dropout2d backward",
});
}
let da = if self.input.requires_grad() {
let go_data = grad_output.data_vec()?;
let grad_a: Vec<T> = go_data
.iter()
.zip(self.scaled_mask.iter())
.map(|(&g, &m)| g * m)
.collect();
let g = Tensor::from_storage(
TensorStorage::cpu(grad_a),
self.input.shape().to_vec(),
false,
)?;
Some(g)
} else {
None
};
Ok(vec![da])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"Dropout2dBackward"
}
}
#[derive(Debug)]
pub struct Dropout<T: Float> {
p: f64,
training: bool,
_marker: std::marker::PhantomData<T>,
}
impl<T: Float> Dropout<T> {
pub fn new(p: f64) -> FerrotorchResult<Self> {
if !(0.0..1.0).contains(&p) {
return Err(FerrotorchError::InvalidArgument {
message: format!("dropout probability must be in [0, 1), got {p}"),
});
}
Ok(Self {
p,
training: true,
_marker: std::marker::PhantomData,
})
}
}
impl<T: Float> Module<T> for Dropout<T> {
fn forward(&self, input: &Tensor<T>) -> FerrotorchResult<Tensor<T>> {
if !self.training || self.p == 0.0 {
return Ok(input.clone());
}
let numel = input.numel();
let scale = T::from(1.0 / (1.0 - self.p)).unwrap();
let zero = <T as num_traits::Zero>::zero();
if input.is_cuda() {
if let Some(backend) = ferrotorch_core::gpu_dispatch::gpu_backend() {
let threshold = (self.p * u32::MAX as f64) as u32;
let scale_f32 = 1.0f32 / (1.0 - self.p as f32);
let (handle, rng_state) =
backend.dropout_philox_f32(input.gpu_handle()?, threshold, scale_f32)?;
if is_grad_enabled() && input.requires_grad() {
let scaled_mask_vec = philox_dropout_mask(numel, threshold, scale, &rng_state);
let mask_cpu = Tensor::from_storage(
TensorStorage::cpu(scaled_mask_vec),
input.shape().to_vec(),
false,
)?;
let scaled_mask = mask_cpu.to(input.device())?;
return Tensor::from_operation(
TensorStorage::gpu(handle),
input.shape().to_vec(),
Arc::new(DropoutBackward {
input: input.clone(),
scaled_mask,
}),
);
} else {
return Tensor::from_storage(
TensorStorage::gpu(handle),
input.shape().to_vec(),
false,
);
}
}
}
let mut state = xorshift_seed();
let scaled_mask_vec: Vec<T> = (0..numel)
.map(|_| {
if xorshift_next(&mut state) < self.p {
zero
} else {
scale
}
})
.collect();
let input_data = input.data()?;
let output_data: Vec<T> = input_data
.iter()
.zip(scaled_mask_vec.iter())
.map(|(&x, &m)| x * m)
.collect();
if is_grad_enabled() && input.requires_grad() {
let scaled_mask = Tensor::from_storage(
TensorStorage::cpu(scaled_mask_vec),
input.shape().to_vec(),
false,
)?;
Tensor::from_operation(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
Arc::new(DropoutBackward {
input: input.clone(),
scaled_mask,
}),
)
} else {
Tensor::from_storage(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
false,
)
}
}
fn parameters(&self) -> Vec<&Parameter<T>> {
vec![]
}
fn parameters_mut(&mut self) -> Vec<&mut Parameter<T>> {
vec![]
}
fn named_parameters(&self) -> Vec<(String, &Parameter<T>)> {
vec![]
}
fn train(&mut self) {
self.training = true;
}
fn eval(&mut self) {
self.training = false;
}
fn is_training(&self) -> bool {
self.training
}
}
#[derive(Debug)]
pub struct Dropout2d<T: Float> {
p: f64,
training: bool,
_marker: std::marker::PhantomData<T>,
}
impl<T: Float> Dropout2d<T> {
pub fn new(p: f64) -> FerrotorchResult<Self> {
if !(0.0..1.0).contains(&p) {
return Err(FerrotorchError::InvalidArgument {
message: format!("dropout2d probability must be in [0, 1), got {p}"),
});
}
Ok(Self {
p,
training: true,
_marker: std::marker::PhantomData,
})
}
}
impl<T: Float> Module<T> for Dropout2d<T> {
fn forward(&self, input: &Tensor<T>) -> FerrotorchResult<Tensor<T>> {
if !self.training || self.p == 0.0 {
return Ok(input.clone());
}
let shape = input.shape();
if shape.len() < 2 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"Dropout2d expects at least 2D input [B, C, ...], got shape {:?}",
shape
),
});
}
let batch = shape[0];
let channels = shape[1];
let spatial: usize = shape[2..].iter().product();
let numel = input.numel();
let scale = T::from(1.0 / (1.0 - self.p)).unwrap();
let zero = <T as num_traits::Zero>::zero();
if input.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "Dropout2d" });
}
let mut state = xorshift_seed();
let channel_mask: Vec<bool> = (0..batch * channels)
.map(|_| xorshift_next(&mut state) >= self.p)
.collect();
let scaled_mask: Vec<T> = {
let mut mask = Vec::with_capacity(numel);
for &cm in &channel_mask {
let val = if cm { scale } else { zero };
for _ in 0..spatial {
mask.push(val);
}
}
mask
};
let input_data = input.data_vec()?;
let output_data: Vec<T> = input_data
.iter()
.zip(scaled_mask.iter())
.map(|(&x, &m)| x * m)
.collect();
let result = if is_grad_enabled() && input.requires_grad() {
Tensor::from_operation(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
Arc::new(Dropout2dBackward {
input: input.clone(),
scaled_mask,
}),
)?
} else {
Tensor::from_storage(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
false,
)?
};
Ok(result)
}
fn parameters(&self) -> Vec<&Parameter<T>> {
vec![]
}
fn parameters_mut(&mut self) -> Vec<&mut Parameter<T>> {
vec![]
}
fn named_parameters(&self) -> Vec<(String, &Parameter<T>)> {
vec![]
}
fn train(&mut self) {
self.training = true;
}
fn eval(&mut self) {
self.training = false;
}
fn is_training(&self) -> bool {
self.training
}
}
#[derive(Debug)]
pub struct Dropout1d<T: Float> {
p: f64,
training: bool,
_marker: std::marker::PhantomData<T>,
}
impl<T: Float> Dropout1d<T> {
pub fn new(p: f64) -> FerrotorchResult<Self> {
if !(0.0..1.0).contains(&p) {
return Err(FerrotorchError::InvalidArgument {
message: format!("dropout1d probability must be in [0, 1), got {p}"),
});
}
Ok(Self {
p,
training: true,
_marker: std::marker::PhantomData,
})
}
}
impl<T: Float> Module<T> for Dropout1d<T> {
fn forward(&self, input: &Tensor<T>) -> FerrotorchResult<Tensor<T>> {
if !self.training || self.p == 0.0 {
return Ok(input.clone());
}
let shape = input.shape();
if shape.len() != 3 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"Dropout1d expects 3D input [B, C, L], got shape {:?}",
shape
),
});
}
let batch = shape[0];
let channels = shape[1];
let length = shape[2];
let numel = input.numel();
let scale = T::from(1.0 / (1.0 - self.p)).unwrap();
let zero = <T as num_traits::Zero>::zero();
if input.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "Dropout1d" });
}
let mut state = xorshift_seed();
let channel_mask: Vec<bool> = (0..batch * channels)
.map(|_| xorshift_next(&mut state) >= self.p)
.collect();
let scaled_mask: Vec<T> = {
let mut mask = Vec::with_capacity(numel);
for &cm in &channel_mask {
let val = if cm { scale } else { zero };
for _ in 0..length {
mask.push(val);
}
}
mask
};
let input_data = input.data_vec()?;
let output_data: Vec<T> = input_data
.iter()
.zip(scaled_mask.iter())
.map(|(&x, &m)| x * m)
.collect();
let result = if is_grad_enabled() && input.requires_grad() {
Tensor::from_operation(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
Arc::new(Dropout2dBackward {
input: input.clone(),
scaled_mask,
}),
)?
} else {
Tensor::from_storage(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
false,
)?
};
Ok(result)
}
fn parameters(&self) -> Vec<&Parameter<T>> {
vec![]
}
fn parameters_mut(&mut self) -> Vec<&mut Parameter<T>> {
vec![]
}
fn named_parameters(&self) -> Vec<(String, &Parameter<T>)> {
vec![]
}
fn train(&mut self) {
self.training = true;
}
fn eval(&mut self) {
self.training = false;
}
fn is_training(&self) -> bool {
self.training
}
}
#[derive(Debug)]
pub struct Dropout3d<T: Float> {
p: f64,
training: bool,
_marker: std::marker::PhantomData<T>,
}
impl<T: Float> Dropout3d<T> {
pub fn new(p: f64) -> FerrotorchResult<Self> {
if !(0.0..1.0).contains(&p) {
return Err(FerrotorchError::InvalidArgument {
message: format!("dropout3d probability must be in [0, 1), got {p}"),
});
}
Ok(Self {
p,
training: true,
_marker: std::marker::PhantomData,
})
}
}
impl<T: Float> Module<T> for Dropout3d<T> {
fn forward(&self, input: &Tensor<T>) -> FerrotorchResult<Tensor<T>> {
if !self.training || self.p == 0.0 {
return Ok(input.clone());
}
let shape = input.shape();
if shape.len() != 5 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"Dropout3d expects 5D input [B, C, D, H, W], got shape {:?}",
shape
),
});
}
let batch = shape[0];
let channels = shape[1];
let spatial: usize = shape[2..].iter().product();
let numel = input.numel();
let scale = T::from(1.0 / (1.0 - self.p)).unwrap();
let zero = <T as num_traits::Zero>::zero();
if input.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "Dropout3d" });
}
let mut state = xorshift_seed();
let channel_mask: Vec<bool> = (0..batch * channels)
.map(|_| xorshift_next(&mut state) >= self.p)
.collect();
let scaled_mask: Vec<T> = {
let mut mask = Vec::with_capacity(numel);
for &cm in &channel_mask {
let val = if cm { scale } else { zero };
for _ in 0..spatial {
mask.push(val);
}
}
mask
};
let input_data = input.data_vec()?;
let output_data: Vec<T> = input_data
.iter()
.zip(scaled_mask.iter())
.map(|(&x, &m)| x * m)
.collect();
let result = if is_grad_enabled() && input.requires_grad() {
Tensor::from_operation(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
Arc::new(Dropout2dBackward {
input: input.clone(),
scaled_mask,
}),
)?
} else {
Tensor::from_storage(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
false,
)?
};
Ok(result)
}
fn parameters(&self) -> Vec<&Parameter<T>> {
vec![]
}
fn parameters_mut(&mut self) -> Vec<&mut Parameter<T>> {
vec![]
}
fn named_parameters(&self) -> Vec<(String, &Parameter<T>)> {
vec![]
}
fn train(&mut self) {
self.training = true;
}
fn eval(&mut self) {
self.training = false;
}
fn is_training(&self) -> bool {
self.training
}
}
#[derive(Debug)]
pub struct AlphaDropout<T: Float> {
p: f64,
training: bool,
_marker: std::marker::PhantomData<T>,
}
const SELU_ALPHA: f64 = 1.6732632423543772;
const SELU_LAMBDA: f64 = 1.0507009873554805;
impl<T: Float> AlphaDropout<T> {
pub fn new(p: f64) -> FerrotorchResult<Self> {
if !(0.0..1.0).contains(&p) {
return Err(FerrotorchError::InvalidArgument {
message: format!("alpha_dropout probability must be in [0, 1), got {p}"),
});
}
Ok(Self {
p,
training: true,
_marker: std::marker::PhantomData,
})
}
}
#[derive(Debug)]
struct AlphaDropoutBackward<T: Float> {
input: Tensor<T>,
grad_mask: Vec<T>,
}
impl<T: Float> GradFn<T> for AlphaDropoutBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda {
op: "AlphaDropout backward",
});
}
let da = if self.input.requires_grad() {
let go_data = grad_output.data_vec()?;
let grad_a: Vec<T> = go_data
.iter()
.zip(self.grad_mask.iter())
.map(|(&g, &m)| g * m)
.collect();
let g = Tensor::from_storage(
TensorStorage::cpu(grad_a),
self.input.shape().to_vec(),
false,
)?;
Some(g)
} else {
None
};
Ok(vec![da])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"AlphaDropoutBackward"
}
}
impl<T: Float> Module<T> for AlphaDropout<T> {
fn forward(&self, input: &Tensor<T>) -> FerrotorchResult<Tensor<T>> {
if !self.training || self.p == 0.0 {
return Ok(input.clone());
}
if input.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "AlphaDropout" });
}
let numel = input.numel();
let p = self.p;
let alpha_prime = -SELU_LAMBDA * SELU_ALPHA;
let q = 1.0 - p;
let a_f64 = 1.0 / (q + alpha_prime * alpha_prime * p * q).sqrt();
let b_f64 = -a_f64 * p * alpha_prime;
let a = T::from(a_f64).unwrap();
let b = T::from(b_f64).unwrap();
let alpha_prime_t = T::from(alpha_prime).unwrap();
let zero = <T as num_traits::Zero>::zero();
let mut state = xorshift_seed();
let keep: Vec<bool> = (0..numel).map(|_| xorshift_next(&mut state) >= p).collect();
let input_data = input.data()?;
let mut output_data = Vec::with_capacity(numel);
let mut grad_mask = Vec::with_capacity(numel);
for (i, &x) in input_data.iter().enumerate() {
if keep[i] {
output_data.push(a * x + b);
grad_mask.push(a);
} else {
output_data.push(a * alpha_prime_t + b);
grad_mask.push(zero);
}
}
if is_grad_enabled() && input.requires_grad() {
Tensor::from_operation(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
Arc::new(AlphaDropoutBackward {
input: input.clone(),
grad_mask,
}),
)
} else {
Tensor::from_storage(
TensorStorage::cpu(output_data),
input.shape().to_vec(),
false,
)
}
}
fn parameters(&self) -> Vec<&Parameter<T>> {
vec![]
}
fn parameters_mut(&mut self) -> Vec<&mut Parameter<T>> {
vec![]
}
fn named_parameters(&self) -> Vec<(String, &Parameter<T>)> {
vec![]
}
fn train(&mut self) {
self.training = true;
}
fn eval(&mut self) {
self.training = false;
}
fn is_training(&self) -> bool {
self.training
}
}
#[cfg(test)]
mod tests {
use super::*;
fn leaf_tensor(data: &[f32], shape: &[usize], requires_grad: bool) -> Tensor<f32> {
Tensor::from_storage(
TensorStorage::cpu(data.to_vec()),
shape.to_vec(),
requires_grad,
)
.unwrap()
}
#[test]
fn test_dropout_rate_approximately_correct() {
let d = Dropout::<f32>::new(0.5).unwrap();
let input = ferrotorch_core::ones::<f32>(&[100_000]).unwrap();
let output = d.forward(&input).unwrap();
let data = output.data().unwrap();
let zeros = data.iter().filter(|&&x| x == 0.0).count();
let rate = zeros as f64 / data.len() as f64;
assert!(
(rate - 0.5).abs() < 0.05,
"dropout rate = {rate}, expected ~0.5"
);
let non_zero: Vec<f32> = data.iter().copied().filter(|&x| x != 0.0).collect();
assert!(!non_zero.is_empty());
for &v in &non_zero {
assert!(
(v - 2.0).abs() < 1e-6,
"surviving element = {v}, expected 2.0"
);
}
}
#[test]
fn test_dropout_eval_is_identity() {
let mut d = Dropout::<f32>::new(0.5).unwrap();
d.eval();
assert!(!d.is_training());
let input = ferrotorch_core::ones::<f32>(&[100]).unwrap();
let output = d.forward(&input).unwrap();
assert!(output.is_same(&input));
}
#[test]
fn test_dropout_zero_prob_is_identity() {
let d = Dropout::<f32>::new(0.0).unwrap();
let input = ferrotorch_core::ones::<f32>(&[100]).unwrap();
let output = d.forward(&input).unwrap();
assert!(output.is_same(&input));
}
#[test]
fn test_dropout_invalid_p() {
assert!(Dropout::<f32>::new(1.0).is_err());
assert!(Dropout::<f32>::new(-0.1).is_err());
assert!(Dropout::<f32>::new(1.5).is_err());
}
#[test]
fn test_dropout_backward_routes_through_surviving() {
let d = Dropout::<f32>::new(0.5).unwrap();
let input = leaf_tensor(&[1.0; 1000], &[1000], true);
let output = d.forward(&input).unwrap();
let out_data = output.data().unwrap().to_vec();
let total: f32 = out_data.iter().sum();
#[derive(Debug)]
struct SumBackward<T: Float> {
input: Tensor<T>,
}
impl<T: Float> GradFn<T> for SumBackward<T> {
fn backward(
&self,
_grad_output: &Tensor<T>,
) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let ones = vec![<T as num_traits::One>::one(); self.input.numel()];
let t = Tensor::from_storage(
TensorStorage::cpu(ones),
self.input.shape().to_vec(),
false,
)?;
Ok(vec![Some(t)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"SumBackward"
}
}
let loss = Tensor::from_operation(
TensorStorage::cpu(vec![total]),
vec![],
Arc::new(SumBackward {
input: output.clone(),
}),
)
.unwrap();
loss.backward().unwrap();
let grad = input.grad().unwrap().unwrap();
let grad_data = grad.data().unwrap();
for &g in grad_data {
assert!(
g == 0.0 || (g - 2.0).abs() < 1e-6,
"gradient element = {g}, expected 0.0 or 2.0"
);
}
let out_data = output.data().unwrap();
for (i, (&o, &g)) in out_data.iter().zip(grad_data.iter()).enumerate() {
assert_eq!(
o == 0.0,
g == 0.0,
"mismatch at index {i}: output={o}, grad={g}"
);
}
}
#[test]
fn test_dropout_no_parameters() {
let d = Dropout::<f32>::new(0.3).unwrap();
assert!(d.parameters().is_empty());
assert!(d.named_parameters().is_empty());
}
#[test]
fn test_dropout_train_eval_toggle() {
let mut d = Dropout::<f32>::new(0.5).unwrap();
assert!(d.is_training());
d.eval();
assert!(!d.is_training());
d.train();
assert!(d.is_training());
}
#[test]
fn test_dropout_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Dropout<f32>>();
assert_send_sync::<Dropout<f64>>();
}
#[test]
fn test_dropout2d_drops_whole_channels() {
let d = Dropout2d::<f32>::new(0.5).unwrap();
let input = ferrotorch_core::ones::<f32>(&[2, 10, 4, 4]).unwrap();
let output = d.forward(&input).unwrap();
let data = output.data().unwrap();
let spatial = 4 * 4;
for b in 0..2 {
for c in 0..10 {
let start = (b * 10 + c) * spatial;
let end = start + spatial;
let channel = &data[start..end];
let first = channel[0];
assert!(
channel.iter().all(|&x| (x - first).abs() < 1e-6),
"channel (b={b}, c={c}) is not uniform: first={first}, channel={channel:?}"
);
assert!(
first == 0.0 || (first - 2.0).abs() < 1e-6,
"channel value = {first}, expected 0.0 or 2.0"
);
}
}
}
#[test]
fn test_dropout2d_rate_approximately_correct() {
let d = Dropout2d::<f32>::new(0.5).unwrap();
let input = ferrotorch_core::ones::<f32>(&[1, 1000, 2, 2]).unwrap();
let output = d.forward(&input).unwrap();
let data = output.data().unwrap();
let spatial = 2 * 2;
let mut dropped = 0;
for c in 0..1000 {
let start = c * spatial;
if data[start] == 0.0 {
dropped += 1;
}
}
let rate = dropped as f64 / 1000.0;
assert!(
(rate - 0.5).abs() < 0.05,
"dropout2d rate = {rate}, expected ~0.5"
);
}
#[test]
fn test_dropout2d_eval_is_identity() {
let mut d = Dropout2d::<f32>::new(0.5).unwrap();
d.eval();
let input = ferrotorch_core::ones::<f32>(&[2, 3, 4, 4]).unwrap();
let output = d.forward(&input).unwrap();
assert!(output.is_same(&input));
}
#[test]
fn test_dropout2d_invalid_p() {
assert!(Dropout2d::<f32>::new(1.0).is_err());
assert!(Dropout2d::<f32>::new(-0.1).is_err());
}
#[test]
fn test_dropout2d_requires_2d_input() {
let d = Dropout2d::<f32>::new(0.3).unwrap();
let input_1d = ferrotorch_core::ones::<f32>(&[10]).unwrap();
assert!(d.forward(&input_1d).is_err());
}
#[test]
fn test_dropout2d_backward_routes_through_surviving_channels() {
let d = Dropout2d::<f32>::new(0.5).unwrap();
let input = leaf_tensor(&[1.0; 20 * 3 * 3], &[1, 20, 3, 3], true);
let output = d.forward(&input).unwrap();
let out_data = output.data().unwrap().to_vec();
let total: f32 = out_data.iter().sum();
#[derive(Debug)]
struct SumBackward<T: Float> {
input: Tensor<T>,
}
impl<T: Float> GradFn<T> for SumBackward<T> {
fn backward(
&self,
_grad_output: &Tensor<T>,
) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let ones = vec![<T as num_traits::One>::one(); self.input.numel()];
let t = Tensor::from_storage(
TensorStorage::cpu(ones),
self.input.shape().to_vec(),
false,
)?;
Ok(vec![Some(t)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"SumBackward"
}
}
let loss = Tensor::from_operation(
TensorStorage::cpu(vec![total]),
vec![],
Arc::new(SumBackward {
input: output.clone(),
}),
)
.unwrap();
loss.backward().unwrap();
let grad = input.grad().unwrap().unwrap();
let grad_data = grad.data().unwrap();
let out_data = output.data().unwrap();
for (i, (&o, &g)) in out_data.iter().zip(grad_data.iter()).enumerate() {
assert_eq!(
o == 0.0,
g == 0.0,
"mismatch at index {i}: output={o}, grad={g}"
);
}
let spatial = 3 * 3;
for c in 0..20 {
let start = c * spatial;
let end = start + spatial;
let channel_grad = &grad_data[start..end];
let first = channel_grad[0];
assert!(
channel_grad.iter().all(|&g| (g - first).abs() < 1e-6),
"gradient channel {c} is not uniform"
);
}
}
#[test]
fn test_dropout2d_no_parameters() {
let d = Dropout2d::<f32>::new(0.3).unwrap();
assert!(d.parameters().is_empty());
assert!(d.named_parameters().is_empty());
}
#[test]
fn test_dropout2d_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Dropout2d<f32>>();
assert_send_sync::<Dropout2d<f64>>();
}
#[test]
fn test_dropout1d_drops_whole_channels() {
let d = Dropout1d::<f32>::new(0.5).unwrap();
let input = ferrotorch_core::ones::<f32>(&[2, 10, 8]).unwrap();
let output = d.forward(&input).unwrap();
let data = output.data().unwrap();
let length = 8;
for b in 0..2 {
for c in 0..10 {
let start = (b * 10 + c) * length;
let end = start + length;
let channel = &data[start..end];
let first = channel[0];
assert!(
channel.iter().all(|&x| (x - first).abs() < 1e-6),
"channel (b={b}, c={c}) is not uniform"
);
assert!(
first == 0.0 || (first - 2.0).abs() < 1e-6,
"channel value = {first}, expected 0.0 or 2.0"
);
}
}
}
#[test]
fn test_dropout1d_rate_approximately_correct() {
let d = Dropout1d::<f32>::new(0.5).unwrap();
let input = ferrotorch_core::ones::<f32>(&[1, 1000, 4]).unwrap();
let output = d.forward(&input).unwrap();
let data = output.data().unwrap();
let length = 4;
let mut dropped = 0;
for c in 0..1000 {
if data[c * length] == 0.0 {
dropped += 1;
}
}
let rate = dropped as f64 / 1000.0;
assert!(
(rate - 0.5).abs() < 0.05,
"dropout1d rate = {rate}, expected ~0.5"
);
}
#[test]
fn test_dropout1d_eval_is_identity() {
let mut d = Dropout1d::<f32>::new(0.5).unwrap();
d.eval();
let input = ferrotorch_core::ones::<f32>(&[2, 3, 8]).unwrap();
let output = d.forward(&input).unwrap();
assert!(output.is_same(&input));
}
#[test]
fn test_dropout1d_invalid_p() {
assert!(Dropout1d::<f32>::new(1.0).is_err());
assert!(Dropout1d::<f32>::new(-0.1).is_err());
}
#[test]
fn test_dropout1d_requires_3d_input() {
let d = Dropout1d::<f32>::new(0.3).unwrap();
let input_2d = ferrotorch_core::ones::<f32>(&[10, 5]).unwrap();
assert!(d.forward(&input_2d).is_err());
}
#[test]
fn test_dropout1d_no_parameters() {
let d = Dropout1d::<f32>::new(0.3).unwrap();
assert!(d.parameters().is_empty());
}
#[test]
fn test_dropout1d_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Dropout1d<f32>>();
assert_send_sync::<Dropout1d<f64>>();
}
#[test]
fn test_dropout3d_drops_whole_channels() {
let d = Dropout3d::<f32>::new(0.5).unwrap();
let input = ferrotorch_core::ones::<f32>(&[2, 10, 2, 2, 2]).unwrap();
let output = d.forward(&input).unwrap();
let data = output.data().unwrap();
let spatial = 2 * 2 * 2;
for b in 0..2 {
for c in 0..10 {
let start = (b * 10 + c) * spatial;
let end = start + spatial;
let channel = &data[start..end];
let first = channel[0];
assert!(
channel.iter().all(|&x| (x - first).abs() < 1e-6),
"channel (b={b}, c={c}) is not uniform"
);
assert!(
first == 0.0 || (first - 2.0).abs() < 1e-6,
"channel value = {first}, expected 0.0 or 2.0"
);
}
}
}
#[test]
fn test_dropout3d_rate_approximately_correct() {
let d = Dropout3d::<f32>::new(0.5).unwrap();
let input = ferrotorch_core::ones::<f32>(&[1, 1000, 2, 2, 2]).unwrap();
let output = d.forward(&input).unwrap();
let data = output.data().unwrap();
let spatial = 2 * 2 * 2;
let mut dropped = 0;
for c in 0..1000 {
if data[c * spatial] == 0.0 {
dropped += 1;
}
}
let rate = dropped as f64 / 1000.0;
assert!(
(rate - 0.5).abs() < 0.05,
"dropout3d rate = {rate}, expected ~0.5"
);
}
#[test]
fn test_dropout3d_eval_is_identity() {
let mut d = Dropout3d::<f32>::new(0.5).unwrap();
d.eval();
let input = ferrotorch_core::ones::<f32>(&[2, 3, 2, 2, 2]).unwrap();
let output = d.forward(&input).unwrap();
assert!(output.is_same(&input));
}
#[test]
fn test_dropout3d_invalid_p() {
assert!(Dropout3d::<f32>::new(1.0).is_err());
assert!(Dropout3d::<f32>::new(-0.1).is_err());
}
#[test]
fn test_dropout3d_requires_5d_input() {
let d = Dropout3d::<f32>::new(0.3).unwrap();
let input_4d = ferrotorch_core::ones::<f32>(&[2, 3, 4, 4]).unwrap();
assert!(d.forward(&input_4d).is_err());
}
#[test]
fn test_dropout3d_no_parameters() {
let d = Dropout3d::<f32>::new(0.3).unwrap();
assert!(d.parameters().is_empty());
}
#[test]
fn test_dropout3d_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<Dropout3d<f32>>();
assert_send_sync::<Dropout3d<f64>>();
}
#[test]
fn test_alpha_dropout_preserves_mean_approx() {
let d = AlphaDropout::<f64>::new(0.5).unwrap();
let n = 100_000;
let data: Vec<f64> = (0..n).map(|i| (i as f64 / n as f64) - 0.5).collect();
let input_mean: f64 = data.iter().sum::<f64>() / n as f64;
let input = Tensor::from_storage(TensorStorage::cpu(data), vec![1, n], false).unwrap();
let output = d.forward(&input).unwrap();
let out_data = output.data().unwrap();
let out_mean: f64 = out_data.iter().sum::<f64>() / n as f64;
assert!(
(out_mean - input_mean).abs() < 0.05,
"AlphaDropout mean = {out_mean}, input mean = {input_mean}"
);
}
#[test]
fn test_alpha_dropout_eval_is_identity() {
let mut d = AlphaDropout::<f32>::new(0.5).unwrap();
d.eval();
let input = ferrotorch_core::ones::<f32>(&[100]).unwrap();
let output = d.forward(&input).unwrap();
assert!(output.is_same(&input));
}
#[test]
fn test_alpha_dropout_zero_prob_is_identity() {
let d = AlphaDropout::<f32>::new(0.0).unwrap();
let input = ferrotorch_core::ones::<f32>(&[100]).unwrap();
let output = d.forward(&input).unwrap();
assert!(output.is_same(&input));
}
#[test]
fn test_alpha_dropout_invalid_p() {
assert!(AlphaDropout::<f32>::new(1.0).is_err());
assert!(AlphaDropout::<f32>::new(-0.1).is_err());
assert!(AlphaDropout::<f32>::new(1.5).is_err());
}
#[test]
fn test_alpha_dropout_no_parameters() {
let d = AlphaDropout::<f32>::new(0.3).unwrap();
assert!(d.parameters().is_empty());
}
#[test]
fn test_alpha_dropout_backward_routes_gradient() {
let d = AlphaDropout::<f32>::new(0.5).unwrap();
let input = leaf_tensor(&[1.0; 1000], &[1000], true);
let output = d.forward(&input).unwrap();
let out_data = output.data().unwrap().to_vec();
let total: f32 = out_data.iter().sum();
#[derive(Debug)]
struct SumBackward<T: Float> {
input: Tensor<T>,
}
impl<T: Float> GradFn<T> for SumBackward<T> {
fn backward(
&self,
_grad_output: &Tensor<T>,
) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let ones = vec![<T as num_traits::One>::one(); self.input.numel()];
let t = Tensor::from_storage(
TensorStorage::cpu(ones),
self.input.shape().to_vec(),
false,
)?;
Ok(vec![Some(t)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"SumBackward"
}
}
let loss = Tensor::from_operation(
TensorStorage::cpu(vec![total]),
vec![],
Arc::new(SumBackward {
input: output.clone(),
}),
)
.unwrap();
loss.backward().unwrap();
let grad = input.grad().unwrap().unwrap();
let grad_data = grad.data().unwrap();
let mut seen_zero = false;
let mut seen_nonzero = false;
for &g in grad_data {
if g == 0.0 {
seen_zero = true;
} else {
seen_nonzero = true;
}
}
assert!(
seen_zero,
"some elements should have zero gradient (dropped)"
);
assert!(
seen_nonzero,
"some elements should have nonzero gradient (kept)"
);
}
#[test]
fn test_alpha_dropout_train_eval_toggle() {
let mut d = AlphaDropout::<f32>::new(0.5).unwrap();
assert!(d.is_training());
d.eval();
assert!(!d.is_training());
d.train();
assert!(d.is_training());
}
#[test]
fn test_alpha_dropout_is_send_sync() {
fn assert_send_sync<T: Send + Sync>() {}
assert_send_sync::<AlphaDropout<f32>>();
assert_send_sync::<AlphaDropout<f64>>();
}
}