torsh-sparse 0.1.2

//! Common traits for sparse neural networks

use crate::{CsrTensor, SparseTensor, TorshResult};
use std::collections::HashMap;
use torsh_tensor::Tensor;

/// Trait for sparse optimizers
pub trait SparseOptimizer {
    /// Update sparse parameters with sparse gradients
    fn step(
        &mut self,
        parameters: &mut [&mut CsrTensor],
        gradients: &[&CsrTensor],
    ) -> TorshResult<()>;

    /// Zero gradients (if applicable)
    fn zero_grad(&mut self) {}

    /// Get current learning rate
    fn lr(&self) -> f32;

    /// Set learning rate
    fn set_lr(&mut self, lr: f32);

    /// Get optimizer state information
    fn state_dict(&self) -> HashMap<String, Tensor> {
        HashMap::new()
    }

    /// Load optimizer state
    fn load_state_dict(&mut self, _state: HashMap<String, Tensor>) -> TorshResult<()> {
        Ok(())
    }

    /// Get optimizer name
    fn name(&self) -> &'static str;

    /// Get hyperparameters as key-value pairs
    fn hyperparameters(&self) -> HashMap<String, f32>;
}

/// Trait for sparse neural network layers
pub trait SparseLayer {
    /// Forward pass through the layer
    fn forward(&self, input: &dyn SparseTensor) -> TorshResult<Box<dyn SparseTensor>>;

    /// Get layer parameters in sparse format
    fn parameters(&self) -> Vec<&CsrTensor>;

    /// Get mutable layer parameters
    fn parameters_mut(&mut self) -> Vec<&mut CsrTensor>;

    /// Get layer name/type
    fn layer_type(&self) -> &'static str;

    /// Get input and output dimensions
    fn dimensions(&self) -> (Vec<usize>, Vec<usize>);

    /// Get sparsity statistics
    fn sparsity_stats(&self) -> super::types::SparseStats;

    /// Set training mode
    fn train(&mut self, training: bool);

    /// Check if layer is in training mode
    fn training(&self) -> bool;
}

/// Trait for sparse activation functions
pub trait SparseActivation {
    /// Apply activation function to sparse tensor
    fn forward(&self, input: &dyn SparseTensor) -> TorshResult<Box<dyn SparseTensor>>;

    /// Apply activation function in-place (if possible)
    fn forward_inplace(&self, input: &mut dyn SparseTensor) -> TorshResult<()> {
        // Default implementation: not in-place
        let _result = self.forward(input)?;
        // Would need to copy result back to input
        // This is a simplified placeholder
        Ok(())
    }

    /// Get activation function name
    fn name(&self) -> &'static str;

    /// Check if activation preserves sparsity pattern
    fn preserves_sparsity(&self) -> bool;

    /// Check if activation can increase sparsity
    fn can_increase_sparsity(&self) -> bool;
}

/// Trait for sparse pooling operations
pub trait SparsePooling {
    /// Apply pooling operation to sparse tensor
    fn forward(&self, input: &dyn SparseTensor) -> TorshResult<Box<dyn SparseTensor>>;

    /// Get pooling operation name
    fn operation_type(&self) -> &'static str;

    /// Get kernel size
    fn kernel_size(&self) -> (usize, usize);

    /// Get stride
    fn stride(&self) -> (usize, usize);

    /// Get padding
    fn padding(&self) -> (usize, usize);

    /// Calculate output dimensions
    fn output_dimensions(&self, input_dims: &[usize]) -> Vec<usize>;
}

/// Trait for sparse normalization layers
pub trait SparseNormalization {
    /// Apply normalization to sparse tensor
    fn forward(&self, input: &dyn SparseTensor) -> TorshResult<Box<dyn SparseTensor>>;

    /// Update running statistics (for batch norm)
    fn update_stats(&mut self, input: &dyn SparseTensor) -> TorshResult<()>;

    /// Get normalization type
    fn norm_type(&self) -> &'static str;

    /// Get learnable parameters
    fn learnable_parameters(&self) -> Vec<&Tensor>;

    /// Get mutable learnable parameters
    fn learnable_parameters_mut(&mut self) -> Vec<&mut Tensor>;
}

/// Trait for sparse tensor format conversion
pub trait SparseConverter {
    /// Convert to CSR format
    fn to_csr(&self) -> TorshResult<CsrTensor>;

    /// Convert to COO format
    fn to_coo(&self) -> TorshResult<crate::CooTensor>;

    /// Convert to CSC format
    fn to_csc(&self) -> TorshResult<crate::CscTensor>;

    /// Get current format
    fn current_format(&self) -> super::types::SparseFormat;

    /// Check if conversion is needed for operation
    fn needs_conversion(&self, target_format: super::types::SparseFormat) -> bool {
        self.current_format() != target_format
    }
}

/// Trait for sparse tensor initialization
pub trait SparseInitializer {
    /// Initialize sparse tensor with given configuration
    fn initialize(
        &self,
        shape: &[usize],
        config: &super::types::SparseInitConfig,
    ) -> TorshResult<CsrTensor>;

    /// Initialize from dense tensor
    fn from_dense(&self, dense: &Tensor, sparsity: f32) -> TorshResult<CsrTensor>;

    /// Get initialization strategy name
    fn strategy_name(&self) -> &'static str;
}

/// Trait for sparse pruning operations
pub trait SparsePruner {
    /// Prune tensor to target sparsity
    fn prune(&self, tensor: &CsrTensor, target_sparsity: f32) -> TorshResult<CsrTensor>;

    /// Prune tensor based on gradient information
    fn prune_with_gradients(
        &self,
        tensor: &CsrTensor,
        gradients: &CsrTensor,
        target_sparsity: f32,
    ) -> TorshResult<CsrTensor>;

    /// Get pruning strategy name
    fn pruning_strategy(&self) -> &'static str;

    /// Check if pruning is structured (block-wise)
    fn is_structured(&self) -> bool;
}

/// Trait for sparse model analysis
pub trait SparseAnalyzer {
    /// Analyze sparsity patterns in model
    fn analyze_model_sparsity(&self, layers: &[&dyn SparseLayer]) -> ModelSparsityAnalysis;

    /// Recommend optimizations for model
    fn recommend_optimizations(&self, analysis: &ModelSparsityAnalysis) -> Vec<String>;

    /// Estimate memory and computational savings
    fn estimate_savings(&self, analysis: &ModelSparsityAnalysis) -> SavingsEstimate;
}

/// Model-level sparsity analysis results
#[derive(Debug, Clone)]
pub struct ModelSparsityAnalysis {
    /// Overall model sparsity
    pub overall_sparsity: f32,
    /// Per-layer sparsity levels
    pub layer_sparsities: Vec<f32>,
    /// Total parameters
    pub total_parameters: usize,
    /// Sparse parameters
    pub sparse_parameters: usize,
    /// Memory footprint reduction
    pub memory_reduction: f32,
    /// Estimated FLOPs reduction
    pub flops_reduction: f32,
}

/// Estimated savings from sparsity
#[derive(Debug, Clone)]
pub struct SavingsEstimate {
    /// Memory savings (0.0 to 1.0)
    pub memory_savings: f32,
    /// Computational savings (0.0 to 1.0)
    pub compute_savings: f32,
    /// Energy savings (0.0 to 1.0)
    pub energy_savings: f32,
    /// Storage savings (0.0 to 1.0)
    pub storage_savings: f32,
}

impl SavingsEstimate {
    /// Calculate overall efficiency score
    pub fn efficiency_score(&self) -> f32 {
        (self.memory_savings + self.compute_savings + self.energy_savings + self.storage_savings)
            / 4.0
    }
}

/// Default implementations for common operations
pub mod defaults {
    use super::*;

    /// Default sparse initializer using random strategy
    pub struct DefaultSparseInitializer;

    impl SparseInitializer for DefaultSparseInitializer {
        fn initialize(
            &self,
            shape: &[usize],
            config: &super::super::types::SparseInitConfig,
        ) -> TorshResult<CsrTensor> {
            if shape.len() != 2 {
                return Err(crate::TorshError::InvalidArgument(
                    "Only 2D shapes supported".to_string(),
                ));
            }

            super::super::utils::SparseWeightGenerator::from_config(shape[0], shape[1], config)
        }

        fn from_dense(&self, dense: &Tensor, sparsity: f32) -> TorshResult<CsrTensor> {
            super::super::utils::SparseWeightGenerator::prune_by_magnitude(dense, sparsity)
        }

        fn strategy_name(&self) -> &'static str {
            "default_random"
        }
    }

    /// Default magnitude-based pruner
    pub struct MagnitudePruner;

    impl SparsePruner for MagnitudePruner {
        fn prune(&self, tensor: &CsrTensor, target_sparsity: f32) -> TorshResult<CsrTensor> {
            // Convert to dense, prune, convert back
            // This is a simplified implementation
            let dense = tensor.to_dense()?;
            super::super::utils::SparseWeightGenerator::prune_by_magnitude(&dense, target_sparsity)
        }

        fn prune_with_gradients(
            &self,
            tensor: &CsrTensor,
            _gradients: &CsrTensor,
            target_sparsity: f32,
        ) -> TorshResult<CsrTensor> {
            // For magnitude pruning, gradients don't affect the pruning decision
            self.prune(tensor, target_sparsity)
        }

        fn pruning_strategy(&self) -> &'static str {
            "magnitude"
        }

        fn is_structured(&self) -> bool {
            false
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_savings_estimate() {
        let estimate = SavingsEstimate {
            memory_savings: 0.8,
            compute_savings: 0.7,
            energy_savings: 0.75,
            storage_savings: 0.85,
        };

        assert_eq!(estimate.efficiency_score(), 0.775);
    }

    #[test]
    fn test_default_initializer() {
        let initializer = defaults::DefaultSparseInitializer;
        assert_eq!(initializer.strategy_name(), "default_random");

        let config = super::super::types::SparseInitConfig::default();
        let result = initializer.initialize(&[10, 10], &config);
        assert!(result.is_ok());
    }

    #[test]
    fn test_magnitude_pruner() {
        let pruner = defaults::MagnitudePruner;
        assert_eq!(pruner.pruning_strategy(), "magnitude");
        assert!(!pruner.is_structured());
    }
}