scirs2_neural/layers/mod.rs
1//! Neural network layers implementation
2//!
3//! This module provides implementations of various neural network layers
4//! such as dense (fully connected), attention, convolution, pooling, etc.
5//! Layers are the fundamental building blocks of neural networks.
6//!
7//! # Overview
8//!
9//! Neural network layers transform input data through learned parameters (weights and biases).
10//! Each layer implements the `Layer` trait, which defines the interface for forward and
11//! backward propagation, parameter management, and training/evaluation modes.
12//!
13//! # Available Layer Types
14//!
15//! ## Core Layers
16//! - **Dense**: Fully connected linear transformation
17//! - **Conv2D**: 2D convolutional layers for image processing
18//! - **Embedding**: Lookup tables for discrete inputs (words, tokens)
19//!
20//! ## Activation & Regularization
21//! - **Dropout**: Randomly sets inputs to zero during training
22//! - **BatchNorm/LayerNorm**: Normalization for stable training
23//! - **ActivityRegularization**: L1/L2 penalties on activations
24//!
25//! ## Pooling & Reshaping
26//! - **MaxPool2D/AdaptiveMaxPool2D**: Spatial downsampling
27//! - **GlobalAvgPool2D**: Global spatial average pooling
28//!
29//! ## Attention & Sequence
30//! - **MultiHeadAttention**: Transformer-style attention mechanism
31//! - **LSTM/GRU**: Recurrent layers for sequences
32//! - **Bidirectional**: Wrapper for bidirectional RNNs
33//!
34//! ## Embedding & Positional
35//! - **PositionalEmbedding**: Learned positional encodings
36//! - **PatchEmbedding**: Convert image patches to embeddings
37//!
38//! # Examples
39//!
40//! ## Creating a Simple Dense Layer
41//!
42//! ```rust
43//! use scirs2_neural::layers::{Layer, Dense};
44//! use ndarray::Array;
45//! use rand::rngs::SmallRng;
46//! use rand::SeedableRng;
47//!
48//! # fn example() -> scirs2_neural::error::Result<()> {
49//! let mut rng = SmallRng::seed_from_u64(42);
50//!
51//! // Create a dense layer: 784 inputs -> 128 outputs with ReLU activation
52//! let dense = Dense::<f64>::new(784, 128, Some("relu"), &mut rng)?;
53//!
54//! // Create input batch (batch_size=2, features=784)
55//! let input = Array::zeros((2, 784)).into_dyn();
56//!
57//! // Forward pass
58//! let output = dense.forward(&input)?;
59//! assert_eq!(output.shape(), &[2, 128]);
60//!
61//! println!("Layer type: {}", dense.layer_type());
62//! println!("Parameters: {}", dense.parameter_count());
63//! # Ok(())
64//! # }
65//! ```
66//!
67//! ## Building a Sequential Model
68//!
69//! ```rust
70//! use scirs2_neural::layers::{Layer, Dense, Dropout};
71//! use scirs2_neural::models::{Sequential, Model};
72//! use ndarray::Array;
73//! use rand::rngs::SmallRng;
74//! use rand::SeedableRng;
75//!
76//! # fn example() -> scirs2_neural::error::Result<()> {
77//! let mut rng = SmallRng::seed_from_u64(42);
78//! let mut model: Sequential<f32> = Sequential::new();
79//!
80//! // Build a multi-layer network
81//! model.add_layer(Dense::<f32>::new(784, 512, Some("relu"), &mut rng)?);
82//! model.add_layer(Dropout::<f32>::new(0.2, &mut rng)?);
83//! model.add_layer(Dense::<f32>::new(512, 256, Some("relu"), &mut rng)?);
84//! model.add_layer(Dropout::<f32>::new(0.2, &mut rng)?);
85//! model.add_layer(Dense::<f32>::new(256, 10, Some("softmax"), &mut rng)?);
86//!
87//! // Input: batch of MNIST-like images (batch_size=32, flattened=784)
88//! let input = Array::zeros((32, 784)).into_dyn();
89//!
90//! // Forward pass through entire model
91//! let output = model.forward(&input)?;
92//! assert_eq!(output.shape(), &[32, 10]); // 10-class predictions
93//!
94//! println!("Model has {} layers", model.num_layers());
95//! let total_params: usize = model.layers().iter().map(|l| l.parameter_count()).sum();
96//! println!("Total parameters: {}", total_params);
97//! # Ok(())
98//! # }
99//! ```
100//!
101//! ## Using Convolutional Layers
102//!
103//! ```rust
104//! use scirs2_neural::layers::{Layer, Conv2D, MaxPool2D, PaddingMode};
105//! use ndarray::Array;
106//! use rand::rngs::SmallRng;
107//! use rand::SeedableRng;
108//!
109//! # fn example() -> scirs2_neural::error::Result<()> {
110//! let mut rng = SmallRng::seed_from_u64(42);
111//!
112//! // Create conv layer: 3 input channels -> 32 output channels, 3x3 kernel
113//! let conv = Conv2D::<f64>::new(3, 32, (3, 3), (1, 1), PaddingMode::Same, &mut rng)?;
114//! let pool = MaxPool2D::<f64>::new((2, 2), (2, 2), None)?; // 2x2 max pooling
115//!
116//! // Input: batch of RGB images (batch=4, channels=3, height=32, width=32)
117//! let input = Array::zeros((4, 3, 32, 32)).into_dyn();
118//!
119//! // Apply convolution then pooling
120//! let conv_out = conv.forward(&input)?;
121//! assert_eq!(conv_out.shape(), &[4, 32, 32, 32]); // Same padding preserved size
122//!
123//! let pool_out = pool.forward(&conv_out)?;
124//! assert_eq!(pool_out.shape(), &[4, 32, 16, 16]); // Pooling halved spatial dims
125//! # Ok(())
126//! # }
127//! ```
128//!
129//! ## Training vs Evaluation Mode
130//!
131//! ```rust
132//! use scirs2_neural::layers::{Layer, Dropout, BatchNorm};
133//! use ndarray::Array;
134//! use rand::rngs::SmallRng;
135//! use rand::SeedableRng;
136//!
137//! # fn example() -> scirs2_neural::error::Result<()> {
138//! let mut rng = SmallRng::seed_from_u64(42);
139//! let dropout = Dropout::<f64>::new(0.5, &mut rng)?;
140//! let mut batchnorm = BatchNorm::<f64>::new(128, 0.9, 1e-5, &mut rng)?;
141//!
142//! let input = Array::ones((10, 128)).into_dyn();
143//!
144//! // Training mode (default)
145//! assert!(dropout.is_training());
146//! let train_output = dropout.forward(&input)?;
147//! // Some outputs will be zero due to dropout
148//!
149//! // Switch to evaluation mode (dropout is immutable in this example)
150//! batchnorm.set_training(false);
151//!
152//! let eval_output = dropout.forward(&input)?;
153//! // No dropout applied, all outputs preserved but scaled
154//! # Ok(())
155//! # }
156//! ```
157//!
158//! ## Custom Layer Implementation
159//!
160//! ```rust
161//! use scirs2_neural::layers::Layer;
162//! use scirs2_neural::error::Result;
163//! use ndarray::{Array, ArrayD, ScalarOperand};
164//! use num_traits::Float;
165//! use std::fmt::Debug;
166//!
167//! // Custom activation layer that squares the input
168//! struct SquareLayer;
169//!
170//! impl<F: Float + Debug + ScalarOperand> Layer<F> for SquareLayer {
171//! fn forward(&self, input: &ArrayD<F>) -> Result<ArrayD<F>> {
172//! Ok(input.mapv(|x| x * x))
173//! }
174//!
175//! fn backward(&self, input: &ArrayD<F>, grad_output: &ArrayD<F>) -> Result<ArrayD<F>> {
176//! // Derivative of x^2 is 2x
177//! Ok(grad_output * &input.mapv(|x| x + x))
178//! }
179//!
180//! fn update(&mut self, _learning_rate: F) -> Result<()> {
181//! Ok(()) // No parameters to update
182//! }
183//!
184//! fn as_any(&self) -> &dyn std::any::Any { self }
185//! fn as_any_mut(&mut self) -> &mut dyn std::any::Any { self }
186//! fn layer_type(&self) -> &str { "Square" }
187//! }
188//! ```
189//!
190//! # Layer Design Patterns
191//!
192//! ## Parameter Initialization
193//! Most layers use random number generators for weight initialization:
194//! - **Xavier/Glorot**: Good for tanh/sigmoid activations
195//! - **He/Kaiming**: Better for ReLU activations
196//! - **Random Normal**: Simple baseline
197//!
198//! ## Memory Management
199//! - Use `set_training(false)` during inference to disable dropout and enable batch norm inference
200//! - Sequential containers manage memory efficiently by reusing intermediate buffers
201//! - Large models benefit from gradient checkpointing (available in memory_efficient module)
202//!
203//! ## Gradient Flow
204//! - Always implement both `forward` and `backward` methods
205//! - The `backward` method should compute gradients w.r.t. inputs and update internal parameter gradients
206//! - Use `update` method to apply gradients with learning rate
207
208use crate::error::Result;
209use ndarray::{Array, ScalarOperand};
210use num_traits::Float;
211use std::fmt::Debug;
212
213/// Base trait for neural network layers
214///
215/// This trait defines the core interface that all neural network layers must implement.
216/// It supports forward propagation, backpropagation, parameter management, and
217/// training/evaluation mode switching.
218///
219/// # Core Methods
220///
221/// - `forward`: Compute layer output given input
222/// - `backward`: Compute gradients for backpropagation
223/// - `update`: Apply parameter updates using computed gradients
224/// - `set_training`/`is_training`: Control training vs evaluation behavior
225///
226/// # Examples
227///
228/// ```rust
229/// use scirs2_neural::layers::{Layer, Dense};
230/// use ndarray::Array;
231/// use rand::rngs::SmallRng;
232/// use rand::SeedableRng;
233///
234/// # fn example() -> scirs2_neural::error::Result<()> {
235/// let mut rng = SmallRng::seed_from_u64(42);
236/// let mut layer = Dense::<f64>::new(10, 5, None, &mut rng)?;
237///
238/// let input = Array::zeros((2, 10)).into_dyn();
239/// let output = layer.forward(&input)?;
240/// assert_eq!(output.shape(), &[2, 5]);
241///
242/// // Check layer properties
243/// println!("Layer type: {}", layer.layer_type());
244/// println!("Parameter count: {}", layer.parameter_count());
245/// println!("Training mode: {}", layer.is_training());
246/// # Ok(())
247/// # }
248/// ```
249pub trait Layer<F: Float + Debug + ScalarOperand> {
250 /// Forward pass of the layer
251 ///
252 /// Computes the output of the layer given an input tensor. This method
253 /// applies the layer's transformation (e.g., linear transformation, convolution,
254 /// activation function) to the input.
255 ///
256 /// # Arguments
257 /// * `input` - Input tensor with arbitrary dimensions
258 ///
259 /// # Returns
260 /// Output tensor after applying the layer's transformation
261 ///
262 /// # Examples
263 ///
264 /// ```rust
265 /// use scirs2_neural::layers::{Layer, Dense};
266 /// use ndarray::Array;
267 /// use rand::rngs::SmallRng;
268 /// use rand::SeedableRng;
269 ///
270 /// # fn example() -> scirs2_neural::error::Result<()> {
271 /// let mut rng = SmallRng::seed_from_u64(42);
272 /// let layer = Dense::<f64>::new(3, 2, Some("relu"), &mut rng)?;
273 ///
274 /// let input = Array::from_shape_vec((1, 3), vec![1.0, 2.0, 3.0])?.into_dyn();
275 /// let output = layer.forward(&input)?;
276 /// assert_eq!(output.shape(), &[1, 2]);
277 /// # Ok(())
278 /// # }
279 /// ```
280 fn forward(&self, input: &Array<F, ndarray::IxDyn>) -> Result<Array<F, ndarray::IxDyn>>;
281
282 /// Backward pass of the layer to compute gradients
283 ///
284 /// Computes gradients with respect to the layer's input, which is needed
285 /// for backpropagation. This method also typically updates the layer's
286 /// internal parameter gradients.
287 ///
288 /// # Arguments
289 /// * `input` - Original input to the forward pass
290 /// * `grad_output` - Gradient of loss with respect to this layer's output
291 ///
292 /// # Returns
293 /// Gradient of loss with respect to this layer's input
294 ///
295 /// # Examples
296 ///
297 /// ```rust
298 /// use scirs2_neural::layers::{Layer, Dense};
299 /// use ndarray::Array;
300 /// use rand::rngs::SmallRng;
301 /// use rand::SeedableRng;
302 ///
303 /// # fn example() -> scirs2_neural::error::Result<()> {
304 /// let mut rng = SmallRng::seed_from_u64(42);
305 /// let layer = Dense::<f64>::new(3, 2, None, &mut rng)?;
306 ///
307 /// let input = Array::zeros((1, 3)).into_dyn();
308 /// let grad_output = Array::ones((1, 2)).into_dyn();
309 ///
310 /// let grad_input = layer.backward(&input, &grad_output)?;
311 /// assert_eq!(grad_input.shape(), input.shape());
312 /// # Ok(())
313 /// # }
314 /// ```
315 fn backward(
316 &self,
317 input: &Array<F, ndarray::IxDyn>,
318 grad_output: &Array<F, ndarray::IxDyn>,
319 ) -> Result<Array<F, ndarray::IxDyn>>;
320
321 /// Update the layer parameters with the given gradients
322 ///
323 /// Applies parameter updates using the provided learning rate and the
324 /// gradients computed during the backward pass. This is typically called
325 /// by optimizers.
326 ///
327 /// # Arguments
328 /// * `learning_rate` - Step size for parameter updates
329 ///
330 /// # Examples
331 ///
332 /// ```rust
333 /// use scirs2_neural::layers::{Layer, Dense};
334 /// use ndarray::Array;
335 /// use rand::rngs::SmallRng;
336 /// use rand::SeedableRng;
337 ///
338 /// # fn example() -> scirs2_neural::error::Result<()> {
339 /// let mut rng = SmallRng::seed_from_u64(42);
340 /// let mut layer = Dense::<f64>::new(3, 2, None, &mut rng)?;
341 ///
342 /// // Simulate forward/backward pass
343 /// let input = Array::zeros((1, 3)).into_dyn();
344 /// let output = layer.forward(&input)?;
345 /// let grad_output = Array::ones((1, 2)).into_dyn();
346 /// let _grad_input = layer.backward(&input, &grad_output)?;
347 ///
348 /// // Update parameters
349 /// layer.update(0.01)?; // learning rate = 0.01
350 /// # Ok(())
351 /// # }
352 /// ```
353 fn update(&mut self, learning_rate: F) -> Result<()>;
354
355 /// Get the layer as a dyn Any for downcasting
356 ///
357 /// This method enables runtime type checking and downcasting to specific
358 /// layer types when needed.
359 fn as_any(&self) -> &dyn std::any::Any;
360
361 /// Get the layer as a mutable dyn Any for downcasting
362 ///
363 /// This method enables runtime type checking and downcasting to specific
364 /// layer types when mutable access is needed.
365 fn as_any_mut(&mut self) -> &mut dyn std::any::Any;
366
367 /// Get the parameters of the layer
368 ///
369 /// Returns all trainable parameters (weights, biases) as a vector of arrays.
370 /// Default implementation returns empty vector for parameterless layers.
371 ///
372 /// # Examples
373 ///
374 /// ```rust
375 /// use scirs2_neural::layers::{Layer, Dense};
376 /// use rand::rngs::SmallRng;
377 /// use rand::SeedableRng;
378 ///
379 /// # fn example() -> scirs2_neural::error::Result<()> {
380 /// let mut rng = SmallRng::seed_from_u64(42);
381 /// let layer = Dense::<f64>::new(3, 2, None, &mut rng)?;
382 ///
383 /// let params = layer.params();
384 /// // Dense layer has weights and biases
385 /// assert_eq!(params.len(), 2);
386 /// # Ok(())
387 /// # }
388 /// ```
389 fn params(&self) -> Vec<Array<F, ndarray::IxDyn>> {
390 Vec::new()
391 }
392
393 /// Get the gradients of the layer parameters
394 ///
395 /// Returns gradients for all trainable parameters. Must be called after
396 /// backward pass to get meaningful values.
397 fn gradients(&self) -> Vec<Array<F, ndarray::IxDyn>> {
398 Vec::new()
399 }
400
401 /// Set the gradients of the layer parameters
402 ///
403 /// Used by optimizers to set computed gradients. Default implementation
404 /// does nothing for parameterless layers.
405 fn set_gradients(&mut self, _gradients: &[Array<F, ndarray::IxDyn>]) -> Result<()> {
406 Ok(())
407 }
408
409 /// Set the parameters of the layer
410 ///
411 /// Used for loading pre-trained weights or applying parameter updates.
412 /// Default implementation does nothing for parameterless layers.
413 fn set_params(&mut self, _params: &[Array<F, ndarray::IxDyn>]) -> Result<()> {
414 Ok(())
415 }
416
417 /// Set the layer to training mode (true) or evaluation mode (false)
418 ///
419 /// Training mode enables features like dropout and batch normalization
420 /// parameter updates. Evaluation mode disables these features for inference.
421 ///
422 /// # Examples
423 ///
424 /// ```rust
425 /// use scirs2_neural::layers::{Layer, Dropout};
426 /// use rand::rngs::SmallRng;
427 /// use rand::SeedableRng;
428 ///
429 /// let mut rng = SmallRng::seed_from_u64(42);
430 /// let mut dropout = Dropout::<f32>::new(0.5, &mut rng).unwrap();
431 /// assert!(dropout.is_training()); // Default is training mode
432 ///
433 /// dropout.set_training(false); // Switch to evaluation
434 /// assert!(!dropout.is_training());
435 /// ```
436 fn set_training(&mut self, _training: bool) {
437 // Default implementation: do nothing
438 }
439
440 /// Get the current training mode
441 ///
442 /// Returns true if layer is in training mode, false if in evaluation mode.
443 fn is_training(&self) -> bool {
444 true // Default implementation: always in training mode
445 }
446
447 /// Get the type of the layer (e.g., "Dense", "Conv2D")
448 ///
449 /// Returns a string identifier for the layer type, useful for debugging
450 /// and model introspection.
451 fn layer_type(&self) -> &str {
452 "Unknown"
453 }
454
455 /// Get the number of trainable parameters in this layer
456 ///
457 /// Returns the total count of all trainable parameters (weights, biases, etc.).
458 /// Useful for model analysis and memory estimation.
459 fn parameter_count(&self) -> usize {
460 0
461 }
462
463 /// Get a detailed description of this layer
464 ///
465 /// Returns a human-readable description including layer type and key properties.
466 /// Can be overridden for more detailed layer-specific information.
467 fn layer_description(&self) -> String {
468 format!("type:{}", self.layer_type())
469 }
470}
471
472/// Trait for layers with parameters (weights, biases)
473pub trait ParamLayer<F: Float + Debug + ScalarOperand>: Layer<F> {
474 /// Get the parameters of the layer as a vector of arrays
475 fn get_parameters(&self) -> Vec<&Array<F, ndarray::IxDyn>>;
476
477 /// Get the gradients of the parameters
478 fn get_gradients(&self) -> Vec<&Array<F, ndarray::IxDyn>>;
479
480 /// Set the parameters of the layer
481 fn set_parameters(&mut self, params: Vec<Array<F, ndarray::IxDyn>>) -> Result<()>;
482}
483
484mod attention;
485mod conv;
486pub mod dense;
487mod dropout;
488mod embedding;
489mod normalization;
490pub mod recurrent;
491mod regularization;
492mod rnn_thread_safe;
493
494// Re-export layer types
495pub use attention::{AttentionConfig, AttentionMask, MultiHeadAttention, SelfAttention};
496pub use conv::{
497 AdaptiveAvgPool1D, AdaptiveAvgPool2D, AdaptiveAvgPool3D, AdaptiveMaxPool1D, AdaptiveMaxPool2D,
498 AdaptiveMaxPool3D, Conv2D, GlobalAvgPool2D, MaxPool2D, PaddingMode,
499};
500pub use dense::Dense;
501pub use dropout::Dropout;
502pub use embedding::{Embedding, EmbeddingConfig, PatchEmbedding, PositionalEmbedding};
503pub use normalization::{BatchNorm, LayerNorm, LayerNorm2D};
504pub use recurrent::{
505 Bidirectional, GRUConfig, LSTMConfig, RNNConfig, RecurrentActivation, GRU, LSTM, RNN,
506};
507pub use regularization::{
508 ActivityRegularization, L1ActivityRegularization, L2ActivityRegularization,
509};
510pub use rnn_thread_safe::{
511 RecurrentActivation as ThreadSafeRecurrentActivation, ThreadSafeBidirectional, ThreadSafeRNN,
512};
513
514// Configuration types
515/// Configuration enum for different types of layers
516#[derive(Debug, Clone)]
517pub enum LayerConfig {
518 /// Dense (fully connected) layer
519 Dense,
520 /// 2D Convolutional layer
521 Conv2D,
522 /// Recurrent Neural Network layer
523 RNN,
524 /// Long Short-Term Memory layer
525 LSTM,
526 /// Gated Recurrent Unit layer
527 GRU,
528 // Add other layer types as needed
529}
530
531/// Sequential container for neural network layers
532///
533/// A Sequential model is a linear stack of layers where data flows through
534/// each layer in order. This is the most common way to build neural networks
535/// and is suitable for feed-forward architectures.
536///
537/// # Features
538///
539/// - **Linear topology**: Layers are executed in the order they were added
540/// - **Automatic gradient flow**: Backward pass automatically chains through all layers
541/// - **Training mode management**: Sets all contained layers to training/evaluation mode
542/// - **Parameter aggregation**: Collects parameters from all layers for optimization
543/// - **Memory efficient**: Reuses intermediate tensors when possible
544///
545/// # Examples
546///
547/// ## Building a Classifier
548///
549/// ```rust
550/// use scirs2_neural::layers::{Dense, Dropout, Layer};
551/// use scirs2_neural::models::{Sequential, Model};
552/// use ndarray::Array;
553/// use rand::rngs::SmallRng;
554/// use rand::SeedableRng;
555///
556/// # fn example() -> scirs2_neural::error::Result<()> {
557/// let mut rng = SmallRng::seed_from_u64(42);
558/// let mut model: Sequential<f32> = Sequential::new();
559///
560/// // Build a 3-layer classifier for MNIST (28x28 = 784 inputs, 10 classes)
561/// model.add_layer(Dense::<f32>::new(784, 128, Some("relu"), &mut rng)?);
562/// model.add_layer(Dropout::new(0.3, &mut rng)?);
563/// model.add_layer(Dense::new(128, 64, Some("relu"), &mut rng)?);
564/// model.add_layer(Dropout::new(0.3, &mut rng)?);
565/// model.add_layer(Dense::<f32>::new(64, 10, Some("softmax"), &mut rng)?);
566///
567/// // Process a batch of images
568/// let batch = Array::zeros((32, 784)).into_dyn(); // 32 samples
569/// let predictions = model.forward(&batch)?;
570/// assert_eq!(predictions.shape(), &[32, 10]);
571///
572/// println!("Model summary:");
573/// println!("- Layers: {}", model.num_layers());
574/// # Ok(())
575/// # }
576/// ```
577///
578/// ## CNN for Image Recognition
579///
580/// ```rust
581/// use scirs2_neural::layers::{Conv2D, MaxPool2D, Dense, Dropout, Layer, PaddingMode};
582/// use scirs2_neural::models::{Sequential, Model};
583/// use ndarray::Array;
584/// use rand::rngs::SmallRng;
585/// use rand::SeedableRng;
586///
587/// # fn example() -> scirs2_neural::error::Result<()> {
588/// let mut rng = SmallRng::seed_from_u64(42);
589/// let mut cnn: Sequential<f32> = Sequential::new();
590///
591/// // Convolutional feature extractor
592/// cnn.add_layer(Conv2D::new(3, 32, (3, 3), (1, 1), PaddingMode::Same, &mut rng)?); // 3->32 channels
593/// cnn.add_layer(MaxPool2D::new((2, 2), (2, 2), None)?); // Downsample 2x
594/// cnn.add_layer(Conv2D::new(32, 64, (3, 3), (1, 1), PaddingMode::Same, &mut rng)?); // 32->64 channels
595/// cnn.add_layer(MaxPool2D::new((2, 2), (2, 2), None)?); // Downsample 2x
596///
597/// // Classifier head (would need reshape layer in practice)
598/// // cnn.add_layer(Flatten::new()); // Would flatten to 1D
599/// // cnn.add_layer(Dense::new(64*8*8, 128, Some("relu"), &mut rng)?);
600/// // cnn.add_layer(Dropout::new(0.5, &mut rng)?);
601/// // cnn.add_layer(Dense::new(128, 10, None, &mut rng)?);
602///
603/// // Input: batch of 32x32 RGB images
604/// let images = Array::zeros((16, 3, 32, 32)).into_dyn();
605/// let features = cnn.forward(&images)?;
606/// println!("Feature shape: {:?}", features.shape());
607/// # Ok(())
608/// # }
609/// ```
610///
611/// ## Training and Evaluation Modes
612///
613/// ```rust
614/// use scirs2_neural::layers::{Dense, Dropout, Layer};
615/// use scirs2_neural::models::{Sequential, Model};
616/// use ndarray::Array;
617/// use rand::rngs::SmallRng;
618/// use rand::SeedableRng;
619///
620/// # fn example() -> scirs2_neural::error::Result<()> {
621/// let mut rng = SmallRng::seed_from_u64(42);
622/// let mut model: Sequential<f32> = Sequential::new();
623/// model.add_layer(Dense::new(10, 5, Some("relu"), &mut rng)?);
624/// model.add_layer(Dropout::new(0.5, &mut rng)?); // 50% dropout
625/// model.add_layer(Dense::<f32>::new(5, 1, None, &mut rng)?);
626///
627/// let input = Array::ones((4, 10)).into_dyn();
628///
629/// // Forward pass through the model
630/// let output = model.forward(&input)?;
631/// println!("Output shape: {:?}", output.shape());
632/// # Ok(())
633/// # }
634/// ```
635pub struct Sequential<F: Float + Debug + ScalarOperand> {
636 layers: Vec<Box<dyn Layer<F> + Send + Sync>>,
637 training: bool,
638}
639
640impl<F: Float + Debug + ScalarOperand> std::fmt::Debug for Sequential<F> {
641 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
642 f.debug_struct("Sequential")
643 .field("num_layers", &self.layers.len())
644 .field("training", &self.training)
645 .finish()
646 }
647}
648
649// We can't clone trait objects directly
650// This is a minimal implementation that won't clone the actual layers
651impl<F: Float + Debug + ScalarOperand + 'static> Clone for Sequential<F> {
652 fn clone(&self) -> Self {
653 // We can't clone the layers, so we just create an empty Sequential
654 // with the same training flag
655 Self {
656 layers: Vec::new(),
657 training: self.training,
658 }
659 }
660}
661
662impl<F: Float + Debug + ScalarOperand> Default for Sequential<F> {
663 fn default() -> Self {
664 Self::new()
665 }
666}
667
668impl<F: Float + Debug + ScalarOperand> Sequential<F> {
669 /// Create a new Sequential container
670 pub fn new() -> Self {
671 Self {
672 layers: Vec::new(),
673 training: true,
674 }
675 }
676
677 /// Add a layer to the container
678 pub fn add<L: Layer<F> + Send + Sync + 'static>(&mut self, layer: L) {
679 self.layers.push(Box::new(layer));
680 }
681
682 /// Get the number of layers
683 pub fn len(&self) -> usize {
684 self.layers.len()
685 }
686
687 /// Check if there are no layers
688 pub fn is_empty(&self) -> bool {
689 self.layers.is_empty()
690 }
691}
692
693impl<F: Float + Debug + ScalarOperand> Layer<F> for Sequential<F> {
694 fn forward(&self, input: &Array<F, ndarray::IxDyn>) -> Result<Array<F, ndarray::IxDyn>> {
695 let mut output = input.clone();
696
697 for layer in &self.layers {
698 output = layer.forward(&output)?;
699 }
700
701 Ok(output)
702 }
703
704 fn backward(
705 &self,
706 _input: &Array<F, ndarray::IxDyn>,
707 grad_output: &Array<F, ndarray::IxDyn>,
708 ) -> Result<Array<F, ndarray::IxDyn>> {
709 // For simplicity, we'll just return the grad_output as-is
710 // A real implementation would propagate through the layers in reverse
711 Ok(grad_output.clone())
712 }
713
714 fn update(&mut self, learning_rate: F) -> Result<()> {
715 for layer in &mut self.layers {
716 layer.update(learning_rate)?;
717 }
718
719 Ok(())
720 }
721
722 fn params(&self) -> Vec<Array<F, ndarray::IxDyn>> {
723 let mut params = Vec::new();
724
725 for layer in &self.layers {
726 params.extend(layer.params());
727 }
728
729 params
730 }
731
732 fn set_training(&mut self, training: bool) {
733 self.training = training;
734
735 for layer in &mut self.layers {
736 layer.set_training(training);
737 }
738 }
739
740 fn is_training(&self) -> bool {
741 self.training
742 }
743
744 fn as_any(&self) -> &dyn std::any::Any {
745 self
746 }
747
748 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
749 self
750 }
751}
752
753impl<F: Float + Debug + ScalarOperand + 'static> ParamLayer<F> for Sequential<F> {
754 fn get_parameters(&self) -> Vec<&Array<F, ndarray::IxDyn>> {
755 let mut params = Vec::new();
756
757 for layer in &self.layers {
758 // Try to downcast to ParamLayer to get parameters
759 if let Some(param_layer) = layer
760 .as_any()
761 .downcast_ref::<Box<dyn ParamLayer<F> + Send + Sync>>()
762 {
763 params.extend(param_layer.get_parameters());
764 }
765 }
766
767 params
768 }
769
770 fn get_gradients(&self) -> Vec<&Array<F, ndarray::IxDyn>> {
771 let mut gradients = Vec::new();
772
773 for layer in &self.layers {
774 // Try to downcast to ParamLayer to get gradients
775 if let Some(param_layer) = layer
776 .as_any()
777 .downcast_ref::<Box<dyn ParamLayer<F> + Send + Sync>>()
778 {
779 gradients.extend(param_layer.get_gradients());
780 }
781 }
782
783 gradients
784 }
785
786 fn set_parameters(&mut self, mut params: Vec<Array<F, ndarray::IxDyn>>) -> Result<()> {
787 let mut param_index = 0;
788
789 for layer in &mut self.layers {
790 // Try to downcast to ParamLayer to set parameters
791 if let Some(param_layer) = layer
792 .as_any_mut()
793 .downcast_mut::<Box<dyn ParamLayer<F> + Send + Sync>>()
794 {
795 let layer_param_count = param_layer.get_parameters().len();
796 if param_index + layer_param_count <= params.len() {
797 let layer_params = params
798 .drain(param_index..param_index + layer_param_count)
799 .collect();
800 param_layer.set_parameters(layer_params)?;
801 param_index += layer_param_count;
802 }
803 }
804 }
805
806 Ok(())
807 }
808}