Skip to main content

torsh_python/nn/
dropout.rs

1//! Dropout and regularization layers
2
3use super::module::PyModule;
4use crate::{error::PyResult, py_result, tensor::PyTensor};
5use pyo3::prelude::*;
6use std::collections::HashMap;
7
8/// Dropout layer
9#[pyclass(name = "Dropout", extends = PyModule)]
10pub struct PyDropout {
11    p: f32,
12    inplace: bool,
13    training: bool,
14}
15
16#[pymethods]
17impl PyDropout {
18    #[new]
19    fn new(p: Option<f32>, inplace: Option<bool>) -> PyResult<(Self, PyModule)> {
20        let p = p.unwrap_or(0.5);
21        let inplace = inplace.unwrap_or(false);
22
23        if !(0.0..=1.0).contains(&p) {
24            return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
25                "dropout probability has to be between 0 and 1, but got {p}",
26            ));
27        }
28
29        Ok((
30            Self {
31                p,
32                inplace,
33                training: true,
34            },
35            PyModule::new(),
36        ))
37    }
38
39    /// Forward pass through dropout
40    fn forward(&mut self, input: &PyTensor) -> PyResult<PyTensor> {
41        // ✅ Proper dropout implementation with random mask
42        if !self.training || self.p == 0.0 {
43            // In eval mode or p=0, return input unchanged
44            return Ok(PyTensor {
45                tensor: input.tensor.clone(),
46            });
47        }
48
49        if self.p == 1.0 {
50            // All values dropped
51            let zeros = py_result!(torsh_tensor::creation::zeros_like(&input.tensor))?;
52            return Ok(PyTensor { tensor: zeros });
53        }
54
55        // ✅ SciRS2 POLICY: Use scirs2_core::random for RNG
56        use scirs2_core::random::Distribution;
57        use scirs2_core::random::{thread_rng, Uniform};
58
59        let mut rng = thread_rng();
60        let dist = Uniform::new(0.0_f32, 1.0_f32).map_err(|e| {
61            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
62                "Failed to create uniform distribution: {}",
63                e
64            ))
65        })?;
66
67        let mut data = py_result!(input.tensor.data())?;
68        let scale = 1.0 / (1.0 - self.p);
69
70        for val in data.iter_mut() {
71            if dist.sample(&mut rng) < self.p {
72                *val = 0.0;
73            } else {
74                *val *= scale; // Scale to maintain expected value
75            }
76        }
77
78        let shape = input.tensor.shape().dims().to_vec();
79        let result = py_result!(torsh_tensor::Tensor::from_data(
80            data,
81            shape,
82            input.tensor.device()
83        ))?;
84
85        Ok(PyTensor { tensor: result })
86    }
87
88    /// Get layer parameters (Dropout has no parameters)
89    fn parameters(&self) -> PyResult<Vec<PyTensor>> {
90        Ok(Vec::new())
91    }
92
93    /// Get named parameters (Dropout has no parameters)
94    fn named_parameters(&self) -> PyResult<HashMap<String, PyTensor>> {
95        Ok(HashMap::new())
96    }
97
98    /// Set training mode
99    fn train(&mut self, mode: Option<bool>) -> PyResult<()> {
100        self.training = mode.unwrap_or(true);
101        Ok(())
102    }
103
104    /// Set evaluation mode
105    fn eval(&mut self) -> PyResult<()> {
106        self.training = false;
107        Ok(())
108    }
109
110    /// String representation
111    fn __repr__(&self) -> String {
112        format!("Dropout(p={}, inplace={})", self.p, self.inplace)
113    }
114}
115
116/// 2D Dropout layer
117#[pyclass(name = "Dropout2d", extends = PyModule)]
118pub struct PyDropout2d {
119    p: f32,
120    inplace: bool,
121    training: bool,
122}
123
124#[pymethods]
125impl PyDropout2d {
126    #[new]
127    fn new(p: Option<f32>, inplace: Option<bool>) -> PyResult<(Self, PyModule)> {
128        let p = p.unwrap_or(0.5);
129        let inplace = inplace.unwrap_or(false);
130
131        if !(0.0..=1.0).contains(&p) {
132            return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
133                "dropout probability has to be between 0 and 1, but got {p}",
134            ));
135        }
136
137        Ok((
138            Self {
139                p,
140                inplace,
141                training: true,
142            },
143            PyModule::new(),
144        ))
145    }
146
147    /// Forward pass through 2D dropout
148    fn forward(&mut self, input: &PyTensor) -> PyResult<PyTensor> {
149        // ✅ Proper 2D dropout implementation - drops entire channels
150        if !self.training || self.p == 0.0 {
151            return Ok(PyTensor {
152                tensor: input.tensor.clone(),
153            });
154        }
155
156        if self.p == 1.0 {
157            let zeros = py_result!(torsh_tensor::creation::zeros_like(&input.tensor))?;
158            return Ok(PyTensor { tensor: zeros });
159        }
160
161        // ✅ SciRS2 POLICY: Use scirs2_core::random for RNG
162        use scirs2_core::random::Distribution;
163        use scirs2_core::random::{thread_rng, Uniform};
164
165        let shape = input.tensor.shape().dims().to_vec();
166        if shape.len() < 2 {
167            return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
168                "Dropout2d expects at least 2D input",
169            ));
170        }
171
172        let mut rng = thread_rng();
173        let dist = Uniform::new(0.0_f32, 1.0_f32).map_err(|e| {
174            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
175                "Failed to create uniform distribution: {}",
176                e
177            ))
178        })?;
179
180        let batch_size = shape[0];
181        let channels = shape[1];
182        let spatial_size: usize = shape[2..].iter().product();
183
184        let mut data = py_result!(input.tensor.data())?;
185        let scale = 1.0 / (1.0 - self.p);
186
187        // Drop entire channels
188        for b in 0..batch_size {
189            for c in 0..channels {
190                if dist.sample(&mut rng) < self.p {
191                    // Drop entire channel
192                    let start = (b * channels + c) * spatial_size;
193                    let end = start + spatial_size;
194                    for val in &mut data[start..end] {
195                        *val = 0.0;
196                    }
197                } else {
198                    // Scale channel
199                    let start = (b * channels + c) * spatial_size;
200                    let end = start + spatial_size;
201                    for val in &mut data[start..end] {
202                        *val *= scale;
203                    }
204                }
205            }
206        }
207
208        let result = py_result!(torsh_tensor::Tensor::from_data(
209            data,
210            shape.to_vec(),
211            input.tensor.device()
212        ))?;
213
214        Ok(PyTensor { tensor: result })
215    }
216
217    /// Get layer parameters (Dropout2d has no parameters)
218    fn parameters(&self) -> PyResult<Vec<PyTensor>> {
219        Ok(Vec::new())
220    }
221
222    /// Get named parameters (Dropout2d has no parameters)
223    fn named_parameters(&self) -> PyResult<HashMap<String, PyTensor>> {
224        Ok(HashMap::new())
225    }
226
227    /// Set training mode
228    fn train(&mut self, mode: Option<bool>) -> PyResult<()> {
229        self.training = mode.unwrap_or(true);
230        Ok(())
231    }
232
233    /// Set evaluation mode
234    fn eval(&mut self) -> PyResult<()> {
235        self.training = false;
236        Ok(())
237    }
238
239    /// String representation
240    fn __repr__(&self) -> String {
241        format!("Dropout2d(p={}, inplace={})", self.p, self.inplace)
242    }
243}
244
245/// 3D Dropout layer
246#[pyclass(name = "Dropout3d", extends = PyModule)]
247pub struct PyDropout3d {
248    p: f32,
249    inplace: bool,
250    training: bool,
251}
252
253#[pymethods]
254impl PyDropout3d {
255    #[new]
256    fn new(p: Option<f32>, inplace: Option<bool>) -> PyResult<(Self, PyModule)> {
257        let p = p.unwrap_or(0.5);
258        let inplace = inplace.unwrap_or(false);
259
260        if !(0.0..=1.0).contains(&p) {
261            return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
262                "dropout probability has to be between 0 and 1, but got {p}",
263            ));
264        }
265
266        Ok((
267            Self {
268                p,
269                inplace,
270                training: true,
271            },
272            PyModule::new(),
273        ))
274    }
275
276    /// Forward pass through 3D dropout
277    fn forward(&mut self, input: &PyTensor) -> PyResult<PyTensor> {
278        // ✅ Proper 3D dropout implementation - drops entire channels (same as 2D)
279        if !self.training || self.p == 0.0 {
280            return Ok(PyTensor {
281                tensor: input.tensor.clone(),
282            });
283        }
284
285        if self.p == 1.0 {
286            let zeros = py_result!(torsh_tensor::creation::zeros_like(&input.tensor))?;
287            return Ok(PyTensor { tensor: zeros });
288        }
289
290        // ✅ SciRS2 POLICY: Use scirs2_core::random for RNG
291        use scirs2_core::random::Distribution;
292        use scirs2_core::random::{thread_rng, Uniform};
293
294        let shape = input.tensor.shape().dims().to_vec();
295        if shape.len() < 3 {
296            return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
297                "Dropout3d expects at least 3D input",
298            ));
299        }
300
301        let mut rng = thread_rng();
302        let dist = Uniform::new(0.0_f32, 1.0_f32).map_err(|e| {
303            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
304                "Failed to create uniform distribution: {}",
305                e
306            ))
307        })?;
308
309        let batch_size = shape[0];
310        let channels = shape[1];
311        let spatial_size: usize = shape[2..].iter().product();
312
313        let mut data = py_result!(input.tensor.data())?;
314        let scale = 1.0 / (1.0 - self.p);
315
316        // Drop entire channels
317        for b in 0..batch_size {
318            for c in 0..channels {
319                if dist.sample(&mut rng) < self.p {
320                    // Drop entire channel
321                    let start = (b * channels + c) * spatial_size;
322                    let end = start + spatial_size;
323                    for val in &mut data[start..end] {
324                        *val = 0.0;
325                    }
326                } else {
327                    // Scale channel
328                    let start = (b * channels + c) * spatial_size;
329                    let end = start + spatial_size;
330                    for val in &mut data[start..end] {
331                        *val *= scale;
332                    }
333                }
334            }
335        }
336
337        let result = py_result!(torsh_tensor::Tensor::from_data(
338            data,
339            shape.to_vec(),
340            input.tensor.device()
341        ))?;
342
343        Ok(PyTensor { tensor: result })
344    }
345
346    /// Get layer parameters (Dropout3d has no parameters)
347    fn parameters(&self) -> PyResult<Vec<PyTensor>> {
348        Ok(Vec::new())
349    }
350
351    /// Get named parameters (Dropout3d has no parameters)
352    fn named_parameters(&self) -> PyResult<HashMap<String, PyTensor>> {
353        Ok(HashMap::new())
354    }
355
356    /// Set training mode
357    fn train(&mut self, mode: Option<bool>) -> PyResult<()> {
358        self.training = mode.unwrap_or(true);
359        Ok(())
360    }
361
362    /// Set evaluation mode
363    fn eval(&mut self) -> PyResult<()> {
364        self.training = false;
365        Ok(())
366    }
367
368    /// String representation
369    fn __repr__(&self) -> String {
370        format!("Dropout3d(p={}, inplace={})", self.p, self.inplace)
371    }
372}
373
374/// Alpha Dropout layer (for SELU activation)
375#[pyclass(name = "AlphaDropout", extends = PyModule)]
376pub struct PyAlphaDropout {
377    p: f32,
378    inplace: bool,
379    training: bool,
380}
381
382#[pymethods]
383impl PyAlphaDropout {
384    #[new]
385    fn new(p: Option<f32>, inplace: Option<bool>) -> PyResult<(Self, PyModule)> {
386        let p = p.unwrap_or(0.5);
387        let inplace = inplace.unwrap_or(false);
388
389        if !(0.0..=1.0).contains(&p) {
390            return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
391                "dropout probability has to be between 0 and 1, but got {p}",
392            ));
393        }
394
395        Ok((
396            Self {
397                p,
398                inplace,
399                training: true,
400            },
401            PyModule::new(),
402        ))
403    }
404
405    /// Forward pass through alpha dropout
406    fn forward(&mut self, input: &PyTensor) -> PyResult<PyTensor> {
407        // ✅ Proper AlphaDropout implementation for SELU networks
408        if !self.training || self.p == 0.0 {
409            return Ok(PyTensor {
410                tensor: input.tensor.clone(),
411            });
412        }
413
414        if self.p == 1.0 {
415            // When p=1, all values are set to alpha'
416            let alpha_prime = -1.7580993408473766_f32;
417            let mut data = py_result!(input.tensor.data())?;
418            for val in data.iter_mut() {
419                *val = alpha_prime;
420            }
421            let shape = input.tensor.shape().dims().to_vec();
422            let result = py_result!(torsh_tensor::Tensor::from_data(
423                data,
424                shape,
425                input.tensor.device()
426            ))?;
427            return Ok(PyTensor { tensor: result });
428        }
429
430        // ✅ SciRS2 POLICY: Use scirs2_core::random for RNG
431        use scirs2_core::random::Distribution;
432        use scirs2_core::random::{thread_rng, Uniform};
433
434        // SELU constants
435        let _alpha = 1.6732632423543772_f32;
436        let alpha_prime = -1.7580993408473766_f32; // -alpha * lambda where lambda = 1.0507
437
438        // Calculate affine transformation parameters to maintain self-normalization
439        let a = ((1.0 - self.p) * (1.0 + self.p * alpha_prime * alpha_prime)).sqrt();
440        let b = -a * alpha_prime * self.p;
441
442        let mut rng = thread_rng();
443        let dist = Uniform::new(0.0_f32, 1.0_f32).map_err(|e| {
444            PyErr::new::<pyo3::exceptions::PyValueError, _>(format!(
445                "Failed to create uniform distribution: {}",
446                e
447            ))
448        })?;
449
450        let mut data = py_result!(input.tensor.data())?;
451
452        for val in data.iter_mut() {
453            if dist.sample(&mut rng) < self.p {
454                // Set to alpha' and apply affine transformation
455                *val = (*val * 0.0 + alpha_prime) * a + b;
456            } else {
457                // Keep value and apply affine transformation
458                *val = *val * a + b;
459            }
460        }
461
462        let shape = input.tensor.shape().dims().to_vec();
463        let result = py_result!(torsh_tensor::Tensor::from_data(
464            data,
465            shape,
466            input.tensor.device()
467        ))?;
468
469        Ok(PyTensor { tensor: result })
470    }
471
472    /// Get layer parameters (AlphaDropout has no parameters)
473    fn parameters(&self) -> PyResult<Vec<PyTensor>> {
474        Ok(Vec::new())
475    }
476
477    /// Get named parameters (AlphaDropout has no parameters)
478    fn named_parameters(&self) -> PyResult<HashMap<String, PyTensor>> {
479        Ok(HashMap::new())
480    }
481
482    /// Set training mode
483    fn train(&mut self, mode: Option<bool>) -> PyResult<()> {
484        self.training = mode.unwrap_or(true);
485        Ok(())
486    }
487
488    /// Set evaluation mode
489    fn eval(&mut self) -> PyResult<()> {
490        self.training = false;
491        Ok(())
492    }
493
494    /// String representation
495    fn __repr__(&self) -> String {
496        format!("AlphaDropout(p={}, inplace={})", self.p, self.inplace)
497    }
498}