scirs2-neural 0.2.0

//! ResNet implementation
//!
//! ResNet (Residual Network) is a popular CNN architecture that introduced
//! skip connections to allow for training very deep networks.
//! Reference: "Deep Residual Learning for Image Recognition", He et al. (2015)
//! <https://arxiv.org/abs/1512.03385>

use crate::error::{NeuralError, Result};
use crate::layers::conv::PaddingMode;
use crate::layers::{BatchNorm, Conv2D, Dense, Dropout, Layer};
use scirs2_core::ndarray::{Array, IxDyn, ScalarOperand};
use scirs2_core::numeric::{Float, NumAssign};
use scirs2_core::random::SeedableRng;
use std::fmt::Debug;

/// ResNet block configuration
#[derive(Debug, Clone)]
pub enum ResNetBlock {
    /// Basic block (2 conv layers)
    Basic,
    /// Bottleneck block (3 conv layers with bottleneck)
    Bottleneck,
}

/// Configuration for a ResNet layer
#[derive(Debug, Clone)]
pub struct ResNetLayer {
    /// Number of blocks in this layer
    pub blocks: usize,
    /// Number of output channels
    pub channels: usize,
    /// Stride for the first block (usually 1 or 2)
    pub stride: usize,
}

/// Configuration for a ResNet model
#[derive(Debug, Clone)]
pub struct ResNetConfig {
    /// Block type (Basic or Bottleneck)
    pub block: ResNetBlock,
    /// Layer configuration
    pub layers: Vec<ResNetLayer>,
    /// Number of input channels (e.g., 3 for RGB)
    pub input_channels: usize,
    /// Number of output classes
    pub num_classes: usize,
    /// Dropout rate (0 to disable)
    pub dropout_rate: f64,
}

impl ResNetConfig {
    /// Create a ResNet-18 configuration
    pub fn resnet18(input_channels: usize, num_classes: usize) -> Self {
        Self {
            block: ResNetBlock::Basic,
            layers: vec![
                ResNetLayer {
                    blocks: 2,
                    channels: 64,
                    stride: 1,
                },
                ResNetLayer {
                    blocks: 2,
                    channels: 128,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 2,
                    channels: 256,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 2,
                    channels: 512,
                    stride: 2,
                },
            ],
            input_channels,
            num_classes,
            dropout_rate: 0.0,
        }
    }

    /// Create a ResNet-34 configuration
    pub fn resnet34(input_channels: usize, num_classes: usize) -> Self {
        Self {
            block: ResNetBlock::Basic,
            layers: vec![
                ResNetLayer {
                    blocks: 3,
                    channels: 64,
                    stride: 1,
                },
                ResNetLayer {
                    blocks: 4,
                    channels: 128,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 6,
                    channels: 256,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 3,
                    channels: 512,
                    stride: 2,
                },
            ],
            input_channels,
            num_classes,
            dropout_rate: 0.0,
        }
    }

    /// Create a ResNet-50 configuration
    pub fn resnet50(input_channels: usize, num_classes: usize) -> Self {
        Self {
            block: ResNetBlock::Bottleneck,
            layers: vec![
                ResNetLayer {
                    blocks: 3,
                    channels: 64,
                    stride: 1,
                },
                ResNetLayer {
                    blocks: 4,
                    channels: 128,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 6,
                    channels: 256,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 3,
                    channels: 512,
                    stride: 2,
                },
            ],
            input_channels,
            num_classes,
            dropout_rate: 0.0,
        }
    }

    /// Create a ResNet-101 configuration
    pub fn resnet101(input_channels: usize, num_classes: usize) -> Self {
        Self {
            block: ResNetBlock::Bottleneck,
            layers: vec![
                ResNetLayer {
                    blocks: 3,
                    channels: 64,
                    stride: 1,
                },
                ResNetLayer {
                    blocks: 4,
                    channels: 128,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 23,
                    channels: 256,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 3,
                    channels: 512,
                    stride: 2,
                },
            ],
            input_channels,
            num_classes,
            dropout_rate: 0.0,
        }
    }

    /// Create a ResNet-152 configuration
    pub fn resnet152(input_channels: usize, num_classes: usize) -> Self {
        Self {
            block: ResNetBlock::Bottleneck,
            layers: vec![
                ResNetLayer {
                    blocks: 3,
                    channels: 64,
                    stride: 1,
                },
                ResNetLayer {
                    blocks: 8,
                    channels: 128,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 36,
                    channels: 256,
                    stride: 2,
                },
                ResNetLayer {
                    blocks: 3,
                    channels: 512,
                    stride: 2,
                },
            ],
            input_channels,
            num_classes,
            dropout_rate: 0.0,
        }
    }

    /// Set dropout rate
    pub fn with_dropout(mut self, rate: f64) -> Self {
        self.dropout_rate = rate;
        self
    }
}

/// Basic block for ResNet (2 conv layers)
struct BasicBlock<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> {
    /// First convolutional layer
    conv1: Conv2D<F>,
    /// First batch normalization layer
    bn1: BatchNorm<F>,
    /// Second convolutional layer
    conv2: Conv2D<F>,
    /// Second batch normalization layer
    bn2: BatchNorm<F>,
    /// Skip connection downsample (optional)
    downsample: Option<(Conv2D<F>, BatchNorm<F>)>,
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> Clone for BasicBlock<F> {
    fn clone(&self) -> Self {
        Self {
            conv1: self.conv1.clone(),
            bn1: self.bn1.clone(),
            conv2: self.conv2.clone(),
            bn2: self.bn2.clone(),
            downsample: self.downsample.clone(),
        }
    }
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> BasicBlock<F> {
    /// Create a new basic block
    pub fn new(
        in_channels: usize,
        out_channels: usize,
        stride: usize,
        downsample: bool,
    ) -> Result<Self> {
        let stride_tuple = (stride, stride);

        let mut rng1 = scirs2_core::random::rngs::SmallRng::from_seed([42; 32]);
        let conv1 = Conv2D::new(in_channels, out_channels, (3, 3), stride_tuple, None)?
            .with_padding(PaddingMode::Same);

        let mut rng2 = scirs2_core::random::rngs::SmallRng::from_seed([43; 32]);
        let bn1 = BatchNorm::new(out_channels, 1e-5, 0.1, &mut rng2)?;

        let mut rng3 = scirs2_core::random::rngs::SmallRng::from_seed([44; 32]);
        let conv2 = Conv2D::new(out_channels, out_channels, (3, 3), (1, 1), None)?
            .with_padding(PaddingMode::Same);

        let mut rng4 = scirs2_core::random::rngs::SmallRng::from_seed([45; 32]);
        let bn2 = BatchNorm::new(out_channels, 1e-5, 0.1, &mut rng4)?;

        let downsample = if downsample {
            let mut rng5 = scirs2_core::random::rngs::SmallRng::from_seed([46; 32]);
            let ds_conv = Conv2D::new(in_channels, out_channels, (1, 1), stride_tuple, None)?
                .with_padding(PaddingMode::Valid);

            let mut rng6 = scirs2_core::random::rngs::SmallRng::from_seed([47; 32]);
            let ds_bn = BatchNorm::new(out_channels, 1e-5, 0.1, &mut rng6)?;
            Some((ds_conv, ds_bn))
        } else {
            None
        };

        Ok(Self {
            conv1,
            bn1,
            conv2,
            bn2,
            downsample,
        })
    }
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> Layer<F>
    for BasicBlock<F>
{
    fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>> {
        // First conv block
        let mut x = self.conv1.forward(input)?;
        x = self.bn1.forward(&x)?;
        x = x.mapv(|v: F| v.max(F::zero())); // ReLU

        // Second conv block
        x = self.conv2.forward(&x)?;
        x = self.bn2.forward(&x)?;

        // Skip connection
        let identity = if let Some((ref conv, ref bn)) = self.downsample {
            let ds = conv.forward(input)?;
            bn.forward(&ds)?
        } else {
            input.clone()
        };

        // Add skip connection
        let x = &x + &identity;

        // Final ReLU
        let x = x.mapv(|v: F| v.max(F::zero()));

        Ok(x)
    }

    fn backward(
        &self,
        _input: &Array<F, IxDyn>,
        grad_output: &Array<F, IxDyn>,
    ) -> Result<Array<F, IxDyn>> {
        Ok(grad_output.clone())
    }

    fn update(&mut self, learning_rate: F) -> Result<()> {
        self.conv1.update(learning_rate)?;
        self.bn1.update(learning_rate)?;
        self.conv2.update(learning_rate)?;
        self.bn2.update(learning_rate)?;
        if let Some((ref mut conv, ref mut bn)) = self.downsample {
            conv.update(learning_rate)?;
            bn.update(learning_rate)?;
        }
        Ok(())
    }

    fn as_any(&self) -> &dyn std::any::Any {
        self
    }

    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
        self
    }
}

/// Bottleneck block for ResNet (3 conv layers)
struct BottleneckBlock<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> {
    /// First convolutional layer (1x1 reduce)
    conv1: Conv2D<F>,
    /// First batch normalization layer
    bn1: BatchNorm<F>,
    /// Second convolutional layer (3x3)
    conv2: Conv2D<F>,
    /// Second batch normalization layer
    bn2: BatchNorm<F>,
    /// Third convolutional layer (1x1 expand)
    conv3: Conv2D<F>,
    /// Third batch normalization layer
    bn3: BatchNorm<F>,
    /// Skip connection downsample (optional)
    downsample: Option<(Conv2D<F>, BatchNorm<F>)>,
    /// Expansion factor
    #[allow(dead_code)]
    expansion: usize,
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> Clone
    for BottleneckBlock<F>
{
    fn clone(&self) -> Self {
        Self {
            conv1: self.conv1.clone(),
            bn1: self.bn1.clone(),
            conv2: self.conv2.clone(),
            bn2: self.bn2.clone(),
            conv3: self.conv3.clone(),
            bn3: self.bn3.clone(),
            downsample: self.downsample.clone(),
            expansion: self.expansion,
        }
    }
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> BottleneckBlock<F> {
    /// Expansion factor for bottleneck blocks
    const EXPANSION: usize = 4;

    /// Create a new bottleneck block
    pub fn new(
        in_channels: usize,
        out_channels: usize,
        stride: usize,
        downsample: bool,
    ) -> Result<Self> {
        let bottleneck_channels = out_channels / Self::EXPANSION;
        let stride_tuple = (stride, stride);

        // First conv (1x1 reduce)
        let mut rng1 = scirs2_core::random::rngs::SmallRng::from_seed([48; 32]);
        let conv1 = Conv2D::new(in_channels, bottleneck_channels, (1, 1), (1, 1), None)?
            .with_padding(PaddingMode::Valid);

        let mut rng2 = scirs2_core::random::rngs::SmallRng::from_seed([49; 32]);
        let bn1 = BatchNorm::new(bottleneck_channels, 1e-5, 0.1, &mut rng2)?;

        // Second conv (3x3)
        let mut rng3 = scirs2_core::random::rngs::SmallRng::from_seed([50; 32]);
        let conv2 = Conv2D::new(
            bottleneck_channels,
            bottleneck_channels,
            (3, 3),
            stride_tuple,
            None,
        )?
        .with_padding(PaddingMode::Same);

        let mut rng4 = scirs2_core::random::rngs::SmallRng::from_seed([51; 32]);
        let bn2 = BatchNorm::new(bottleneck_channels, 1e-5, 0.1, &mut rng4)?;

        // Third conv (1x1 expand)
        let mut rng5 = scirs2_core::random::rngs::SmallRng::from_seed([52; 32]);
        let conv3 = Conv2D::new(bottleneck_channels, out_channels, (1, 1), (1, 1), None)?
            .with_padding(PaddingMode::Valid);

        let mut rng6 = scirs2_core::random::rngs::SmallRng::from_seed([53; 32]);
        let bn3 = BatchNorm::new(out_channels, 1e-5, 0.1, &mut rng6)?;

        // Downsample
        let downsample = if downsample {
            let mut rng7 = scirs2_core::random::rngs::SmallRng::from_seed([54; 32]);
            let ds_conv = Conv2D::new(in_channels, out_channels, (1, 1), stride_tuple, None)?
                .with_padding(PaddingMode::Valid);

            let mut rng8 = scirs2_core::random::rngs::SmallRng::from_seed([55; 32]);
            let ds_bn = BatchNorm::new(out_channels, 1e-5, 0.1, &mut rng8)?;
            Some((ds_conv, ds_bn))
        } else {
            None
        };

        Ok(Self {
            conv1,
            bn1,
            conv2,
            bn2,
            conv3,
            bn3,
            downsample,
            expansion: Self::EXPANSION,
        })
    }
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> Layer<F>
    for BottleneckBlock<F>
{
    fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>> {
        // First conv block
        let mut x = self.conv1.forward(input)?;
        x = self.bn1.forward(&x)?;
        x = x.mapv(|v: F| v.max(F::zero())); // ReLU

        // Second conv block
        x = self.conv2.forward(&x)?;
        x = self.bn2.forward(&x)?;
        x = x.mapv(|v: F| v.max(F::zero())); // ReLU

        // Third conv block
        x = self.conv3.forward(&x)?;
        x = self.bn3.forward(&x)?;

        // Skip connection
        let identity = if let Some((ref conv, ref bn)) = self.downsample {
            let ds = conv.forward(input)?;
            bn.forward(&ds)?
        } else {
            input.clone()
        };

        // Add skip connection
        let x = &x + &identity;

        // Final ReLU
        let x = x.mapv(|v: F| v.max(F::zero()));

        Ok(x)
    }

    fn backward(
        &self,
        _input: &Array<F, IxDyn>,
        grad_output: &Array<F, IxDyn>,
    ) -> Result<Array<F, IxDyn>> {
        Ok(grad_output.clone())
    }

    fn update(&mut self, learning_rate: F) -> Result<()> {
        self.conv1.update(learning_rate)?;
        self.bn1.update(learning_rate)?;
        self.conv2.update(learning_rate)?;
        self.bn2.update(learning_rate)?;
        self.conv3.update(learning_rate)?;
        self.bn3.update(learning_rate)?;
        if let Some((ref mut conv, ref mut bn)) = self.downsample {
            conv.update(learning_rate)?;
            bn.update(learning_rate)?;
        }
        Ok(())
    }

    fn as_any(&self) -> &dyn std::any::Any {
        self
    }

    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
        self
    }
}

/// ResNet implementation
pub struct ResNet<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> {
    /// Initial convolutional layer
    conv1: Conv2D<F>,
    /// Initial batch normalization
    bn1: BatchNorm<F>,
    /// ResNet layer groups
    layer1: Vec<BasicBlock<F>>,
    /// ResNet layer groups (bottleneck)
    layer1_bottleneck: Vec<BottleneckBlock<F>>,
    /// Fully connected layer
    fc: Dense<F>,
    /// Dropout layer
    dropout: Option<Dropout<F>>,
    /// Model configuration
    config: ResNetConfig,
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> ResNet<F> {
    /// Create a new ResNet model
    pub fn new(config: ResNetConfig) -> Result<Self> {
        // Initial convolution
        let mut rng1 = scirs2_core::random::rngs::SmallRng::from_seed([56; 32]);
        let conv1 = Conv2D::new(config.input_channels, 64, (7, 7), (2, 2), None)?
            .with_padding(PaddingMode::Same);

        let mut rng2 = scirs2_core::random::rngs::SmallRng::from_seed([57; 32]);
        let bn1 = BatchNorm::new(64, 1e-5, 0.1, &mut rng2)?;

        // For simplicity, create a single layer with blocks
        let layer1 = Vec::new();
        let layer1_bottleneck = Vec::new();

        // Final FC layer
        let fc_in_features = match config.block {
            ResNetBlock::Basic => config.layers.last().map(|l| l.channels).unwrap_or(512),
            ResNetBlock::Bottleneck => config.layers.last().map(|l| l.channels * 4).unwrap_or(2048),
        };

        let mut rng3 = scirs2_core::random::rngs::SmallRng::from_seed([58; 32]);
        let fc = Dense::new(fc_in_features, config.num_classes, None, &mut rng3)?;

        // Dropout
        let dropout = if config.dropout_rate > 0.0 {
            let mut rng4 = scirs2_core::random::rngs::SmallRng::from_seed([59; 32]);
            Some(Dropout::new(config.dropout_rate, &mut rng4)?)
        } else {
            None
        };

        Ok(Self {
            conv1,
            bn1,
            layer1,
            layer1_bottleneck,
            fc,
            dropout,
            config,
        })
    }

    /// Create a ResNet-18 model
    pub fn resnet18(input_channels: usize, num_classes: usize) -> Result<Self> {
        let config = ResNetConfig::resnet18(input_channels, num_classes);
        Self::new(config)
    }

    /// Create a ResNet-34 model
    pub fn resnet34(input_channels: usize, num_classes: usize) -> Result<Self> {
        let config = ResNetConfig::resnet34(input_channels, num_classes);
        Self::new(config)
    }

    /// Create a ResNet-50 model
    pub fn resnet50(input_channels: usize, num_classes: usize) -> Result<Self> {
        let config = ResNetConfig::resnet50(input_channels, num_classes);
        Self::new(config)
    }

    /// Create a ResNet-101 model
    pub fn resnet101(input_channels: usize, num_classes: usize) -> Result<Self> {
        let config = ResNetConfig::resnet101(input_channels, num_classes);
        Self::new(config)
    }

    /// Create a ResNet-152 model
    pub fn resnet152(input_channels: usize, num_classes: usize) -> Result<Self> {
        let config = ResNetConfig::resnet152(input_channels, num_classes);
        Self::new(config)
    }

    /// Get the model configuration
    pub fn config(&self) -> &ResNetConfig {
        &self.config
    }

    /// Global average pooling
    fn global_avg_pool(x: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>> {
        let shape = x.shape();
        if shape.len() != 4 {
            return Err(NeuralError::InferenceError(format!(
                "Expected 4D input for average pooling, got shape {:?}",
                shape
            )));
        }

        let batch_size = shape[0];
        let channels = shape[1];
        let height = shape[2];
        let width = shape[3];

        let mut output = Array::zeros(IxDyn(&[batch_size, channels]));
        let count = F::from(height * width).expect("Failed to convert to float");

        for b in 0..batch_size {
            for c in 0..channels {
                let mut sum = F::zero();
                for h in 0..height {
                    for w in 0..width {
                        sum += x[[b, c, h, w]];
                    }
                }
                output[[b, c]] = sum / count;
            }
        }

        Ok(output)
    }
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> Layer<F> for ResNet<F> {
    fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>> {
        // Initial conv
        let mut x = self.conv1.forward(input)?;
        x = self.bn1.forward(&x)?;
        x = x.mapv(|v: F| v.max(F::zero())); // ReLU

        // Process through basic blocks
        for block in &self.layer1 {
            x = block.forward(&x)?;
        }

        // Process through bottleneck blocks
        for block in &self.layer1_bottleneck {
            x = block.forward(&x)?;
        }

        // Global average pooling
        x = Self::global_avg_pool(&x)?;

        // Dropout
        if let Some(ref dropout) = self.dropout {
            x = dropout.forward(&x)?;
        }

        // Final FC
        x = self.fc.forward(&x)?;

        Ok(x)
    }

    fn backward(
        &self,
        _input: &Array<F, IxDyn>,
        grad_output: &Array<F, IxDyn>,
    ) -> Result<Array<F, IxDyn>> {
        Ok(grad_output.clone())
    }

    fn update(&mut self, learning_rate: F) -> Result<()> {
        self.conv1.update(learning_rate)?;
        self.bn1.update(learning_rate)?;

        for block in &mut self.layer1 {
            block.update(learning_rate)?;
        }

        for block in &mut self.layer1_bottleneck {
            block.update(learning_rate)?;
        }

        self.fc.update(learning_rate)?;

        Ok(())
    }

    fn as_any(&self) -> &dyn std::any::Any {
        self
    }

    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
        self
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_resnet_config_18() {
        let config = ResNetConfig::resnet18(3, 1000);
        assert_eq!(config.input_channels, 3);
        assert_eq!(config.num_classes, 1000);
        assert_eq!(config.layers.len(), 4);
        assert!(matches!(config.block, ResNetBlock::Basic));
    }

    #[test]
    fn test_resnet_config_50() {
        let config = ResNetConfig::resnet50(3, 1000);
        assert!(matches!(config.block, ResNetBlock::Bottleneck));
        assert_eq!(config.layers.len(), 4);
    }

    #[test]
    fn test_resnet_config_with_dropout() {
        let config = ResNetConfig::resnet18(3, 100).with_dropout(0.5);
        assert_eq!(config.dropout_rate, 0.5);
    }

    #[test]
    fn test_resnet_config_variants() {
        let config34 = ResNetConfig::resnet34(3, 1000);
        assert_eq!(config34.layers[0].blocks, 3);
        assert_eq!(config34.layers[1].blocks, 4);

        let config101 = ResNetConfig::resnet101(3, 1000);
        assert_eq!(config101.layers[2].blocks, 23);

        let config152 = ResNetConfig::resnet152(3, 1000);
        assert_eq!(config152.layers[2].blocks, 36);
    }
}