oxirs-chat 0.2.4

//! Federated Learning Module
//!
//! Provides distributed model training capabilities across multiple nodes
//! while preserving data privacy and enabling collaborative learning.

use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use std::{
    collections::HashMap,
    net::SocketAddr,
    time::{Duration, SystemTime},
};
use tokio::sync::{RwLock, Semaphore};

use super::fine_tuning::{TrainingMetrics, TrainingParameters};

/// Federated learning coordinator configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FederatedLearningConfig {
    pub federation_id: String,
    pub coordinator_address: SocketAddr,
    pub aggregation_strategy: AggregationStrategy,
    pub privacy_config: PrivacyConfig,
    pub federation_rounds: usize,
    pub min_participants: usize,
    pub max_participants: usize,
    pub round_timeout: Duration,
    pub model_config: FederatedModelConfig,
}

/// Strategies for aggregating model updates
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum AggregationStrategy {
    FederatedAveraging,
    FederatedProx,
    FederatedOpt,
    SecureAggregation,
    AdaptiveAggregation,
    WeightedAveraging,
}

/// Privacy preservation configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PrivacyConfig {
    pub differential_privacy: Option<DifferentialPrivacyConfig>,
    pub secure_aggregation: bool,
    pub homomorphic_encryption: bool,
    pub trusted_execution_environment: bool,
    pub gradient_clipping: Option<GradientClippingConfig>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DifferentialPrivacyConfig {
    pub epsilon: f32,
    pub delta: f32,
    pub noise_multiplier: f32,
    pub max_grad_norm: f32,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GradientClippingConfig {
    pub max_norm: f32,
    pub norm_type: NormType,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum NormType {
    L1,
    L2,
    Infinity,
}

/// Federated model configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FederatedModelConfig {
    pub base_model: String,
    pub model_version: String,
    pub training_parameters: TrainingParameters,
    pub communication_frequency: CommunicationFrequency,
    pub model_compression: ModelCompressionConfig,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum CommunicationFrequency {
    EveryEpoch,
    EveryNSteps(usize),
    Adaptive,
    OnThreshold(f32),
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelCompressionConfig {
    pub enabled: bool,
    pub compression_ratio: f32,
    pub quantization_bits: Option<u8>,
    pub sparsification_threshold: Option<f32>,
}

/// Federated learning participant node
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FederatedNode {
    pub node_id: String,
    pub address: SocketAddr,
    pub capabilities: NodeCapabilities,
    pub data_statistics: DataStatistics,
    pub privacy_budget: PrivacyBudget,
    pub reputation_score: f32,
    pub last_seen: SystemTime,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeCapabilities {
    pub compute_power: ComputePower,
    pub memory_gb: f32,
    pub network_bandwidth_mbps: f32,
    pub storage_gb: f32,
    pub supported_frameworks: Vec<String>,
    pub privacy_features: Vec<PrivacyFeature>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ComputePower {
    CPU(usize),       // Number of cores
    GPU(String, f32), // GPU model and memory
    TPU(String),
    Hybrid(Vec<ComputePower>),
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum PrivacyFeature {
    DifferentialPrivacy,
    SecureAggregation,
    HomomorphicEncryption,
    TrustedExecutionEnvironment,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DataStatistics {
    pub total_samples: usize,
    pub data_distribution: HashMap<String, f32>,
    pub quality_score: f32,
    pub last_updated: SystemTime,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PrivacyBudget {
    pub epsilon_consumed: f32,
    pub epsilon_remaining: f32,
    pub total_epsilon: f32,
    pub reset_period: Duration,
}

/// Federated learning round information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FederationRound {
    pub round_number: usize,
    pub participants: Vec<String>,
    pub global_model_version: String,
    pub aggregation_result: AggregationResult,
    pub round_metrics: RoundMetrics,
    pub started_at: SystemTime,
    pub completed_at: Option<SystemTime>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AggregationResult {
    pub aggregated_weights: Vec<u8>, // Serialized model weights
    pub aggregation_quality: f32,
    pub convergence_indicator: f32,
    pub participation_rate: f32,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RoundMetrics {
    pub average_loss: f32,
    pub accuracy_improvement: f32,
    pub communication_cost: f32,
    pub privacy_cost: f32,
    pub round_duration: Duration,
    pub node_contributions: HashMap<String, NodeContribution>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NodeContribution {
    pub data_contribution: usize,
    pub compute_contribution: f32,
    pub quality_score: f32,
    pub communication_overhead: f32,
}

/// Federated learning coordinator
pub struct FederatedCoordinator {
    config: FederatedLearningConfig,
    nodes: RwLock<HashMap<String, FederatedNode>>,
    rounds: RwLock<Vec<FederationRound>>,
    current_model: RwLock<Option<Vec<u8>>>,
    round_semaphore: Semaphore,
}

impl FederatedCoordinator {
    /// Create new federated learning coordinator
    pub fn new(config: FederatedLearningConfig) -> Self {
        Self {
            config,
            nodes: RwLock::new(HashMap::new()),
            rounds: RwLock::new(Vec::new()),
            current_model: RwLock::new(None),
            round_semaphore: Semaphore::new(1),
        }
    }

    /// Register a new federated learning node
    pub async fn register_node(&self, node: FederatedNode) -> Result<()> {
        let mut nodes = self.nodes.write().await;

        // Validate node capabilities
        self.validate_node_capabilities(&node)?;

        nodes.insert(node.node_id.clone(), node);
        Ok(())
    }

    /// Start a new federation round
    pub async fn start_federation_round(&self) -> Result<usize> {
        let _permit = self
            .round_semaphore
            .acquire()
            .await
            .expect("semaphore should not be closed");

        let round_number = {
            let rounds = self.rounds.read().await;
            rounds.len()
        };

        // Select participants for this round
        let participants = self.select_participants().await?;

        if participants.len() < self.config.min_participants {
            return Err(anyhow!("Insufficient participants for federation round"));
        }

        // Create new federation round
        let federation_round = FederationRound {
            round_number,
            participants: participants.clone(),
            global_model_version: format!("v_{round_number}"),
            aggregation_result: AggregationResult {
                aggregated_weights: Vec::new(),
                aggregation_quality: 0.0,
                convergence_indicator: 0.0,
                participation_rate: participants.len() as f32 / self.config.max_participants as f32,
            },
            round_metrics: RoundMetrics {
                average_loss: 0.0,
                accuracy_improvement: 0.0,
                communication_cost: 0.0,
                privacy_cost: 0.0,
                round_duration: Duration::from_secs(0),
                node_contributions: HashMap::new(),
            },
            started_at: SystemTime::now(),
            completed_at: None,
        };

        {
            let mut rounds = self.rounds.write().await;
            rounds.push(federation_round);
        }

        // Execute federation round
        self.execute_federation_round(round_number, participants)
            .await?;

        Ok(round_number)
    }

    /// Execute a federation round
    async fn execute_federation_round(
        &self,
        round_number: usize,
        participants: Vec<String>,
    ) -> Result<()> {
        // Send global model to participants
        self.distribute_global_model(&participants).await?;

        // Wait for local training completion
        let local_updates = self.collect_local_updates(&participants).await?;

        // Aggregate model updates
        let aggregation_result = self.aggregate_model_updates(local_updates).await?;

        // Update global model
        self.update_global_model(aggregation_result.clone()).await?;

        // Update round information
        {
            let mut rounds = self.rounds.write().await;
            if let Some(round) = rounds.get_mut(round_number) {
                round.aggregation_result = aggregation_result;
                round.completed_at = Some(SystemTime::now());
                round.round_metrics.round_duration = round
                    .completed_at
                    .expect("completed_at was just set to Some")
                    .duration_since(round.started_at)
                    .unwrap_or(Duration::from_secs(0));
            }
        }

        Ok(())
    }

    /// Select participants for federation round
    async fn select_participants(&self) -> Result<Vec<String>> {
        let nodes = self.nodes.read().await;

        // Simple selection strategy: choose top nodes by reputation
        let mut eligible_nodes: Vec<_> = nodes
            .values()
            .filter(|node| self.is_node_eligible(node))
            .collect();

        eligible_nodes.sort_by(|a, b| {
            b.reputation_score
                .partial_cmp(&a.reputation_score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });

        let selected = eligible_nodes
            .into_iter()
            .take(self.config.max_participants)
            .map(|node| node.node_id.clone())
            .collect();

        Ok(selected)
    }

    /// Check if node is eligible for participation
    fn is_node_eligible(&self, node: &FederatedNode) -> bool {
        // Check privacy budget
        if node.privacy_budget.epsilon_remaining <= 0.0 {
            return false;
        }

        // Check node capabilities
        if node.capabilities.memory_gb < 4.0 {
            return false;
        }

        // Check reputation score
        if node.reputation_score < 0.5 {
            return false;
        }

        // Check last seen time
        let now = SystemTime::now();
        if let Ok(duration) = now.duration_since(node.last_seen) {
            if duration > Duration::from_secs(3600) {
                // 1 hour timeout
                return false;
            }
        }

        true
    }

    /// Distribute global model to participants
    async fn distribute_global_model(&self, participants: &[String]) -> Result<()> {
        let current_model = self.current_model.read().await;

        if let Some(_model_weights) = current_model.as_ref() {
            // Simulate model distribution
            for participant_id in participants {
                // In real implementation, this would send model via network
                tokio::time::sleep(Duration::from_millis(100)).await;
                println!("Sent model to participant: {participant_id}");
            }
        }

        Ok(())
    }

    /// Collect local updates from participants
    async fn collect_local_updates(&self, participants: &[String]) -> Result<Vec<LocalUpdate>> {
        let mut updates = Vec::new();

        for participant_id in participants {
            // Simulate waiting for local training
            tokio::time::sleep(Duration::from_millis(500)).await;

            let update = LocalUpdate {
                node_id: participant_id.clone(),
                model_weights: vec![0u8; 1000], // Mock weights
                training_metrics: TrainingMetrics::default(),
                data_contribution: 1000,
                privacy_spent: 0.1,
            };

            updates.push(update);
        }

        Ok(updates)
    }

    /// Aggregate model updates from participants
    async fn aggregate_model_updates(
        &self,
        updates: Vec<LocalUpdate>,
    ) -> Result<AggregationResult> {
        match self.config.aggregation_strategy {
            AggregationStrategy::FederatedAveraging => self.federated_averaging(updates).await,
            AggregationStrategy::WeightedAveraging => self.weighted_averaging(updates).await,
            _ => {
                // Fallback to simple averaging
                self.federated_averaging(updates).await
            }
        }
    }

    /// Federated averaging aggregation
    async fn federated_averaging(&self, updates: Vec<LocalUpdate>) -> Result<AggregationResult> {
        if updates.is_empty() {
            return Err(anyhow!("No updates to aggregate"));
        }

        // Simulate aggregation process
        tokio::time::sleep(Duration::from_millis(200)).await;

        let _total_samples: usize = updates.iter().map(|u| u.data_contribution).sum();
        let aggregated_weights = vec![0u8; 1000]; // Mock aggregated weights

        Ok(AggregationResult {
            aggregated_weights,
            aggregation_quality: 0.9,
            convergence_indicator: 0.8,
            participation_rate: updates.len() as f32 / self.config.max_participants as f32,
        })
    }

    /// Weighted averaging aggregation
    async fn weighted_averaging(&self, updates: Vec<LocalUpdate>) -> Result<AggregationResult> {
        if updates.is_empty() {
            return Err(anyhow!("No updates to aggregate"));
        }

        // Calculate weights based on data contribution and quality
        let _total_weight: f32 = updates
            .iter()
            .map(|u| u.data_contribution as f32 * self.get_node_quality(&u.node_id))
            .sum();

        // Simulate weighted aggregation
        tokio::time::sleep(Duration::from_millis(300)).await;

        let aggregated_weights = vec![0u8; 1000]; // Mock aggregated weights

        Ok(AggregationResult {
            aggregated_weights,
            aggregation_quality: 0.92,
            convergence_indicator: 0.85,
            participation_rate: updates.len() as f32 / self.config.max_participants as f32,
        })
    }

    /// Get node quality score
    fn get_node_quality(&self, _node_id: &str) -> f32 {
        // In real implementation, this would look up node reputation
        0.8
    }

    /// Update global model with aggregation result
    async fn update_global_model(&self, result: AggregationResult) -> Result<()> {
        let mut current_model = self.current_model.write().await;
        *current_model = Some(result.aggregated_weights);
        Ok(())
    }

    /// Validate node capabilities
    fn validate_node_capabilities(&self, node: &FederatedNode) -> Result<()> {
        if node.capabilities.memory_gb < 2.0 {
            return Err(anyhow!(
                "Insufficient memory: {} GB",
                node.capabilities.memory_gb
            ));
        }

        if node.capabilities.network_bandwidth_mbps < 10.0 {
            return Err(anyhow!(
                "Insufficient bandwidth: {} Mbps",
                node.capabilities.network_bandwidth_mbps
            ));
        }

        Ok(())
    }

    /// Get federation statistics
    pub async fn get_federation_statistics(&self) -> Result<FederationStatistics> {
        let nodes = self.nodes.read().await;
        let rounds = self.rounds.read().await;

        let total_nodes = nodes.len();
        let active_nodes = nodes
            .values()
            .filter(|node| self.is_node_eligible(node))
            .count();

        let total_rounds = rounds.len();
        let average_participation = if total_rounds > 0 {
            rounds
                .iter()
                .map(|r| r.aggregation_result.participation_rate)
                .sum::<f32>()
                / total_rounds as f32
        } else {
            0.0
        };

        Ok(FederationStatistics {
            total_nodes,
            active_nodes,
            total_rounds,
            average_participation,
            convergence_status: if total_rounds > 5 {
                "Converging".to_string()
            } else {
                "Training".to_string()
            },
            privacy_budget_utilization: 0.3, // Mock value
        })
    }
}

/// Local model update from federated node
#[derive(Debug, Clone)]
pub struct LocalUpdate {
    pub node_id: String,
    pub model_weights: Vec<u8>,
    pub training_metrics: TrainingMetrics,
    pub data_contribution: usize,
    pub privacy_spent: f32,
}

/// Federation statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FederationStatistics {
    pub total_nodes: usize,
    pub active_nodes: usize,
    pub total_rounds: usize,
    pub average_participation: f32,
    pub convergence_status: String,
    pub privacy_budget_utilization: f32,
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::net::{IpAddr, Ipv4Addr};

    #[tokio::test]
    async fn test_federated_coordinator_creation() {
        let config = FederatedLearningConfig {
            federation_id: "test_federation".to_string(),
            coordinator_address: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080),
            aggregation_strategy: AggregationStrategy::FederatedAveraging,
            privacy_config: PrivacyConfig {
                differential_privacy: None,
                secure_aggregation: false,
                homomorphic_encryption: false,
                trusted_execution_environment: false,
                gradient_clipping: None,
            },
            federation_rounds: 100,
            min_participants: 2,
            max_participants: 10,
            round_timeout: Duration::from_secs(3600),
            model_config: FederatedModelConfig {
                base_model: "test_model".to_string(),
                model_version: "v1.0".to_string(),
                training_parameters: TrainingParameters::default(),
                communication_frequency: CommunicationFrequency::EveryEpoch,
                model_compression: ModelCompressionConfig {
                    enabled: false,
                    compression_ratio: 0.5,
                    quantization_bits: None,
                    sparsification_threshold: None,
                },
            },
        };

        let coordinator = FederatedCoordinator::new(config);
        let stats = coordinator
            .get_federation_statistics()
            .await
            .expect("should succeed");
        assert_eq!(stats.total_nodes, 0);
    }

    #[tokio::test]
    async fn test_node_registration() {
        let config = FederatedLearningConfig {
            federation_id: "test_federation".to_string(),
            coordinator_address: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080),
            aggregation_strategy: AggregationStrategy::FederatedAveraging,
            privacy_config: PrivacyConfig {
                differential_privacy: None,
                secure_aggregation: false,
                homomorphic_encryption: false,
                trusted_execution_environment: false,
                gradient_clipping: None,
            },
            federation_rounds: 100,
            min_participants: 2,
            max_participants: 10,
            round_timeout: Duration::from_secs(3600),
            model_config: FederatedModelConfig {
                base_model: "test_model".to_string(),
                model_version: "v1.0".to_string(),
                training_parameters: TrainingParameters::default(),
                communication_frequency: CommunicationFrequency::EveryEpoch,
                model_compression: ModelCompressionConfig {
                    enabled: false,
                    compression_ratio: 0.5,
                    quantization_bits: None,
                    sparsification_threshold: None,
                },
            },
        };

        let coordinator = FederatedCoordinator::new(config);

        let node = FederatedNode {
            node_id: "node_1".to_string(),
            address: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8081),
            capabilities: NodeCapabilities {
                compute_power: ComputePower::CPU(8),
                memory_gb: 16.0,
                network_bandwidth_mbps: 100.0,
                storage_gb: 1000.0,
                supported_frameworks: vec!["pytorch".to_string()],
                privacy_features: vec![PrivacyFeature::DifferentialPrivacy],
            },
            data_statistics: DataStatistics {
                total_samples: 10000,
                data_distribution: HashMap::new(),
                quality_score: 0.9,
                last_updated: SystemTime::now(),
            },
            privacy_budget: PrivacyBudget {
                epsilon_consumed: 0.0,
                epsilon_remaining: 1.0,
                total_epsilon: 1.0,
                reset_period: Duration::from_secs(86400),
            },
            reputation_score: 0.8,
            last_seen: SystemTime::now(),
        };

        coordinator
            .register_node(node)
            .await
            .expect("should succeed");
        let stats = coordinator
            .get_federation_statistics()
            .await
            .expect("should succeed");
        assert_eq!(stats.total_nodes, 1);
    }
}