reasonkit-core 0.1.8

//! # Adversarial Example Generators
//!
//! Generate adversarial examples to test ML model robustness against deliberate perturbations.
//!
//! ## Supported Attack Methods
//!
//! - **FGSM**: Fast Gradient Sign Method - single-step attack
//! - **PGD**: Projected Gradient Descent - iterative attack
//! - **CW**: Carlini-Wagner - optimization-based attack
//! - **BIM**: Basic Iterative Method - iterative FGSM
//!
//! ## Usage
//!
//! ```rust,ignore
//! use reasonkit::ml_testing::{AdversarialGenerator, AttackMethod};
//!
//! // Create FGSM attacker with epsilon 0.1
//! let attacker = AdversarialGenerator::new(AttackMethod::FGSM, 0.1);
//!
//! // Generate adversarial example
//! let adversarial = attacker.generate(&model, &input, &target_label)?;
//!
//! // Check if attack succeeded
//! let prediction = model.forward(&adversarial)?;
//! let success = prediction.argmax() != target_label;
//! ```

use crate::error::Result;
use crate::ml_testing::{
    utils, GenerationConfig, GenerationResult, MLModel, TestCase, TestCaseType,
};
use ndarray::ArrayD;
use rand::Rng;

/// Adversarial attack configuration
#[derive(Debug, Clone)]
pub struct AdversarialConfig {
    /// Attack method
    pub method: AttackMethod,
    /// Maximum perturbation size (epsilon)
    pub epsilon: f32,
    /// Step size for iterative attacks
    pub step_size: f32,
    /// Number of iterations for iterative attacks
    pub num_iterations: usize,
    /// Confidence parameter for CW attack
    pub confidence: f32,
    /// Learning rate for CW attack
    pub learning_rate: f32,
    /// Maximum iterations for CW attack
    pub max_iterations: usize,
    /// Random restart parameter for PGD
    pub random_restarts: usize,
}

impl Default for AdversarialConfig {
    fn default() -> Self {
        Self {
            method: AttackMethod::FGSM,
            epsilon: 0.1,
            step_size: 0.01,
            num_iterations: 10,
            confidence: 0.0,
            learning_rate: 0.01,
            max_iterations: 1000,
            random_restarts: 0,
        }
    }
}

/// Supported adversarial attack methods
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum AttackMethod {
    /// Fast Gradient Sign Method (single step)
    FGSM,
    /// Projected Gradient Descent (iterative)
    PGD,
    /// Basic Iterative Method (iterative FGSM)
    BIM,
    /// Carlini-Wagner optimization-based attack
    CW,
}

/// Adversarial example generator
pub struct AdversarialGenerator {
    config: AdversarialConfig,
}

impl AdversarialGenerator {
    /// Create a new adversarial generator with default configuration
    pub fn new(method: AttackMethod, epsilon: f32) -> Self {
        let config = AdversarialConfig {
            method,
            epsilon,
            ..Default::default()
        };
        Self { config }
    }

    /// Create generator with custom configuration
    pub fn with_config(config: AdversarialConfig) -> Self {
        Self { config }
    }

    /// Convenience constructor for FGSM
    pub fn fgsm(epsilon: f32) -> Self {
        Self::new(AttackMethod::FGSM, epsilon)
    }

    /// Convenience constructor for PGD
    pub fn pgd(epsilon: f32, num_iterations: usize) -> Self {
        let config = AdversarialConfig {
            method: AttackMethod::PGD,
            epsilon,
            num_iterations,
            ..Default::default()
        };
        Self { config }
    }

    /// Generate a single adversarial example
    pub fn generate<M: MLModel>(
        &self,
        model: &M,
        input: &ArrayD<f32>,
        target: Option<&ArrayD<f32>>,
    ) -> Result<ArrayD<f32>> {
        match self.config.method {
            AttackMethod::FGSM => self.fgsm_attack(model, input, target),
            AttackMethod::PGD => self.pgd_attack(model, input, target),
            AttackMethod::BIM => self.bim_attack(model, input, target),
            AttackMethod::CW => self.cw_attack(model, input, target),
        }
    }

    /// Generate multiple adversarial examples
    pub fn generate_batch<M: MLModel>(
        &self,
        model: &M,
        inputs: &[ArrayD<f32>],
        targets: Option<&[ArrayD<f32>]>,
        config: &GenerationConfig,
    ) -> Result<GenerationResult> {
        let mut result = GenerationResult::new();
        let mut successful_attacks = 0;

        let mut rng = utils::create_rng(config.seed);

        for (i, input) in inputs.iter().enumerate() {
            if result.test_cases.len() >= config.num_cases {
                break;
            }

            let target = targets.and_then(|t| t.get(i));

            match self.generate(model, input, target) {
                Ok(adversarial) => {
                    // Check if attack was successful
                    let original_pred = model.forward(input)?;
                    let adversarial_pred = model.forward(&adversarial)?;

                    let original_class = original_pred
                        .iter()
                        .enumerate()
                        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
                        .map(|(i, _)| i)
                        .unwrap_or(0);
                    let adversarial_class = adversarial_pred
                        .iter()
                        .enumerate()
                        .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
                        .map(|(i, _)| i)
                        .unwrap_or(0);

                    let success = original_class != adversarial_class;

                    if success || rng.gen_bool(config.target_success_rate) {
                        let mut metadata = std::collections::HashMap::new();
                        metadata.insert("original_class".to_string(), original_class.to_string());
                        metadata.insert(
                            "adversarial_class".to_string(),
                            adversarial_class.to_string(),
                        );
                        metadata.insert("attack_success".to_string(), success.to_string());
                        metadata.insert("method".to_string(), format!("{:?}", self.config.method));

                        let test_case = TestCase {
                            input: adversarial,
                            expected_output: target.cloned(),
                            case_type: TestCaseType::Adversarial,
                            method: format!("{:?}", self.config.method),
                            confidence: if success { 1.0 } else { 0.0 },
                            metadata,
                        };

                        result.test_cases.push(test_case);

                        if success {
                            successful_attacks += 1;
                        }
                    }
                }
                Err(e) => {
                    result.warnings.push(format!(
                        "Failed to generate adversarial example {}: {}",
                        i, e
                    ));
                }
            }
        }

        result.success_rate = if !result.test_cases.is_empty() {
            successful_attacks as f64 / result.test_cases.len() as f64
        } else {
            0.0
        };

        result
            .statistics
            .insert("total_attempts".to_string(), inputs.len() as f64);
        result.statistics.insert(
            "successful_generations".to_string(),
            result.test_cases.len() as f64,
        );
        result
            .statistics
            .insert("success_rate".to_string(), result.success_rate);

        Ok(result)
    }

    /// FGSM attack implementation
    fn fgsm_attack<M: MLModel>(
        &self,
        model: &M,
        input: &ArrayD<f32>,
        target: Option<&ArrayD<f32>>,
    ) -> Result<ArrayD<f32>> {
        // Compute gradient
        let grad = model.gradient(input, target)?;

        // Compute perturbation
        let perturbation = utils::sign(&grad);
        let perturbation = &perturbation * self.config.epsilon;

        // Apply perturbation
        let mut adversarial = input.clone();
        adversarial += &perturbation;

        // Clip to valid range (assuming [0, 1] normalized inputs)
        utils::clip(&mut adversarial, 0.0, 1.0);

        Ok(adversarial)
    }

    /// PGD attack implementation
    fn pgd_attack<M: MLModel>(
        &self,
        model: &M,
        input: &ArrayD<f32>,
        target: Option<&ArrayD<f32>>,
    ) -> Result<ArrayD<f32>> {
        let mut adversarial = input.clone();
        let mut rng = utils::create_rng(None);

        // Random restarts
        for _restart in 0..=self.config.random_restarts {
            let mut current = if _restart == 0 {
                input.clone()
            } else {
                // Add random initialization
                let mut init = input.clone();
                utils::add_noise(&mut init, self.config.epsilon * 0.1, &mut rng);
                utils::clip(&mut init, 0.0, 1.0);
                init
            };

            // Iterative attack
            for _ in 0..self.config.num_iterations {
                let grad = model.gradient(&current, target)?;
                let perturbation = utils::sign(&grad);
                let perturbation = &perturbation * self.config.step_size;

                current += &perturbation;

                // Project back to epsilon-ball
                let diff = &current - input;
                let norm = utils::l2_norm(&diff);
                if norm > self.config.epsilon {
                    let scale = self.config.epsilon / norm;
                    current = input + &(&diff * scale);
                }

                utils::clip(&mut current, 0.0, 1.0);
            }

            // Keep the best adversarial example
            let current_pred = model.forward(&current)?;
            let adversarial_pred = model.forward(&adversarial)?;
            let current_loss = self.compute_loss(&current_pred, target)?;
            let adversarial_loss = self.compute_loss(&adversarial_pred, target)?;

            if current_loss > adversarial_loss {
                adversarial = current;
            }
        }

        Ok(adversarial)
    }

    /// BIM attack implementation (iterative FGSM)
    fn bim_attack<M: MLModel>(
        &self,
        model: &M,
        input: &ArrayD<f32>,
        target: Option<&ArrayD<f32>>,
    ) -> Result<ArrayD<f32>> {
        let mut adversarial = input.clone();

        for _ in 0..self.config.num_iterations {
            let grad = model.gradient(&adversarial, target)?;
            let perturbation = utils::sign(&grad);
            let perturbation = &perturbation * self.config.step_size;

            adversarial += &perturbation;

            // Project back to epsilon-ball
            let diff = &adversarial - input;
            let norm = utils::l2_norm(&diff);
            if norm > self.config.epsilon {
                let scale = self.config.epsilon / norm;
                adversarial = input + &(&diff * scale);
            }

            utils::clip(&mut adversarial, 0.0, 1.0);
        }

        Ok(adversarial)
    }

    /// Carlini-Wagner attack implementation (simplified)
    fn cw_attack<M: MLModel>(
        &self,
        model: &M,
        input: &ArrayD<f32>,
        target: Option<&ArrayD<f32>>,
    ) -> Result<ArrayD<f32>> {
        // This is a simplified implementation
        // Full CW attack requires optimization framework

        let adversarial = input.clone();
        let mut best_loss = f32::INFINITY;

        for _ in 0..self.config.max_iterations {
            let grad = model.gradient(&adversarial, target)?;
            let perturbation = &grad * self.config.learning_rate;

            let mut candidate = &adversarial + perturbation;
            utils::clip(&mut candidate, 0.0, 1.0);

            // Check if this is a better adversarial example
            let pred = model.forward(&adversarial)?;
            let loss = self.compute_loss(&pred, target)?;

            if loss < best_loss && self.is_adversarial(&pred, target) {
                best_loss = loss;
                // Keep adversarial as is
            }

            // Add confidence margin
            if let Some(target) = target {
                let target_class = target
                    .iter()
                    .enumerate()
                    .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
                    .map(|(i, _)| i)
                    .unwrap_or(0);
                if pred[target_class] > self.config.confidence {
                    break;
                }
            }
        }

        Ok(adversarial)
    }

    /// Compute loss for adversarial attacks
    fn compute_loss(&self, prediction: &ArrayD<f32>, target: Option<&ArrayD<f32>>) -> Result<f32> {
        match target {
            Some(target) => {
                // Cross-entropy loss
                let mut loss = 0.0;
                for (pred, targ) in prediction.iter().zip(target.iter()) {
                    if *targ > 0.0 {
                        loss -= targ * pred.ln();
                    }
                }
                Ok(loss)
            }
            None => {
                // Untargeted: maximize the loss (minimize confidence)
                Ok(-prediction.iter().cloned().fold(f32::NEG_INFINITY, f32::max))
            }
        }
    }

    /// Check if prediction is adversarial
    fn is_adversarial(&self, prediction: &ArrayD<f32>, target: Option<&ArrayD<f32>>) -> bool {
        match target {
            Some(target) => {
                let pred_class = prediction
                    .iter()
                    .enumerate()
                    .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
                    .map(|(i, _)| i)
                    .unwrap_or(0);
                let target_class = target
                    .iter()
                    .enumerate()
                    .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
                    .map(|(i, _)| i)
                    .unwrap_or(0);
                pred_class != target_class
            }
            None => {
                // For untargeted attacks, check if confidence is below threshold
                prediction.iter().cloned().fold(f32::NEG_INFINITY, f32::max) < 0.5
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::ArrayD;

    // Mock ML model for testing
    struct MockModel {
        weights: ArrayD<f32>,
    }

    impl MockModel {
        fn new() -> Self {
            Self {
                weights: ArrayD::from_elem(vec![10, 10], 1.0),
            }
        }
    }

    impl MLModel for MockModel {
        fn forward(&self, input: &ArrayD<f32>) -> Result<ArrayD<f32>> {
            Ok(input.clone())
        }

        fn gradient(
            &self,
            input: &ArrayD<f32>,
            _target: Option<&ArrayD<f32>>,
        ) -> Result<ArrayD<f32>> {
            Ok(&self.weights - input)
        }

        fn input_shape(&self) -> Vec<usize> {
            vec![10, 10]
        }

        fn output_shape(&self) -> Vec<usize> {
            vec![10, 10]
        }
    }

    #[test]
    fn test_adversarial_config_default() {
        let config = AdversarialConfig::default();
        assert_eq!(config.epsilon, 0.1);
        assert_eq!(config.method, AttackMethod::FGSM);
    }

    #[test]
    fn test_fgsm_generator() {
        let generator = AdversarialGenerator::fgsm(0.1);
        assert_eq!(generator.config.method, AttackMethod::FGSM);
        assert_eq!(generator.config.epsilon, 0.1);
    }

    #[test]
    fn test_pgd_generator() {
        let generator = AdversarialGenerator::pgd(0.1, 20);
        assert_eq!(generator.config.method, AttackMethod::PGD);
        assert_eq!(generator.config.epsilon, 0.1);
        assert_eq!(generator.config.num_iterations, 20);
    }

    #[test]
    fn test_fgsm_attack() {
        let model = MockModel::new();
        let generator = AdversarialGenerator::fgsm(0.1);
        let input = ArrayD::from_elem(vec![10, 10], 0.5);

        let result = generator.generate(&model, &input, None);
        assert!(result.is_ok());
    }
}