ipfrs_network/
auto_tuner.rs

1//! Automatic network configuration tuning based on system resources and usage patterns.
2//!
3//! This module provides intelligent auto-tuning capabilities that analyze system resources,
4//! network conditions, and usage patterns to automatically optimize network configuration
5//! for optimal performance.
6//!
7//! # Features
8//!
9//! - **System Resource Analysis**: Detect available CPU, memory, and network bandwidth
10//! - **Workload Detection**: Identify whether the node is bandwidth-limited, CPU-limited, or memory-limited
11//! - **Dynamic Reconfiguration**: Adjust settings in real-time based on observed performance
12//! - **Profile-based Tuning**: Support for different use case profiles (server, mobile, IoT, etc.)
13//! - **Performance Monitoring**: Track key metrics to guide tuning decisions
14//!
15//! # Example
16//!
17//! ```rust,no_run
18//! use ipfrs_network::{NetworkConfig, auto_tuner::AutoTuner};
19//!
20//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
21//! // Create auto-tuner with default settings
22//! let mut tuner = AutoTuner::new();
23//!
24//! // Analyze system and generate optimized configuration
25//! let config = tuner.generate_config().await?;
26//! println!("Optimized config generated with {:?} max connections", config.max_connections);
27//!
28//! // Continuously monitor and adjust
29//! tuner.start_monitoring().await?;
30//! # Ok(())
31//! # }
32//! ```
33
34use crate::NetworkConfig;
35use parking_lot::RwLock;
36use serde::{Deserialize, Serialize};
37use std::sync::Arc;
38use std::time::{Duration, Instant};
39use thiserror::Error;
40
41/// Errors that can occur during auto-tuning operations
42#[derive(Debug, Error)]
43pub enum AutoTunerError {
44    #[error("Failed to detect system resources: {0}")]
45    ResourceDetectionFailed(String),
46
47    #[error("Invalid configuration: {0}")]
48    InvalidConfig(String),
49
50    #[error("Tuning not initialized")]
51    NotInitialized,
52
53    #[error("Monitoring already running")]
54    MonitoringActive,
55}
56
57/// System resource information detected by the auto-tuner
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct SystemResources {
60    /// Total system memory in bytes
61    pub total_memory: u64,
62
63    /// Available memory in bytes
64    pub available_memory: u64,
65
66    /// Number of CPU cores
67    pub cpu_cores: usize,
68
69    /// Estimated network bandwidth in bytes per second (0 if unknown)
70    pub network_bandwidth: u64,
71
72    /// Whether the system is likely battery-powered (mobile/IoT)
73    pub is_battery_powered: bool,
74}
75
76impl SystemResources {
77    /// Detect current system resources
78    pub fn detect() -> Result<Self, AutoTunerError> {
79        // In a real implementation, this would use system APIs
80        // For now, we'll use conservative defaults
81        Ok(Self {
82            total_memory: 4 * 1024 * 1024 * 1024,     // 4 GB default
83            available_memory: 2 * 1024 * 1024 * 1024, // 2 GB available
84            cpu_cores: num_cpus::get(),
85            network_bandwidth: 0, // Unknown by default
86            is_battery_powered: false,
87        })
88    }
89
90    /// Calculate memory category based on total memory
91    pub fn memory_category(&self) -> &'static str {
92        match self.total_memory {
93            0..134_217_728 => "very_low",           // < 128 MB
94            134_217_728..536_870_912 => "low",      // 128 MB - 512 MB
95            536_870_912..2_147_483_648 => "medium", // 512 MB - 2 GB
96            2_147_483_648..8_589_934_592 => "high", // 2 GB - 8 GB
97            _ => "very_high",                       // >= 8 GB
98        }
99    }
100}
101
102/// Workload characteristics detected during runtime
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct WorkloadProfile {
105    /// Average number of concurrent connections
106    pub avg_connections: f64,
107
108    /// Average DHT query rate (queries per second)
109    pub avg_query_rate: f64,
110
111    /// Average bandwidth usage in bytes per second
112    pub avg_bandwidth_usage: f64,
113
114    /// Peak memory usage in bytes
115    pub peak_memory_usage: u64,
116
117    /// Whether the workload is primarily CPU-bound
118    pub cpu_bound: bool,
119
120    /// Whether the workload is primarily bandwidth-bound
121    pub bandwidth_bound: bool,
122
123    /// Whether the workload is primarily memory-bound
124    pub memory_bound: bool,
125}
126
127impl Default for WorkloadProfile {
128    fn default() -> Self {
129        Self {
130            avg_connections: 0.0,
131            avg_query_rate: 0.0,
132            avg_bandwidth_usage: 0.0,
133            peak_memory_usage: 0,
134            cpu_bound: false,
135            bandwidth_bound: false,
136            memory_bound: false,
137        }
138    }
139}
140
141/// Configuration for the auto-tuner
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct AutoTunerConfig {
144    /// Whether to enable automatic adjustments
145    pub enable_auto_adjust: bool,
146
147    /// Interval for re-evaluating configuration
148    pub adjustment_interval: Duration,
149
150    /// Minimum time before applying a configuration change
151    pub stabilization_period: Duration,
152
153    /// Safety margin for resource usage (0.0 to 1.0)
154    /// For example, 0.2 means use at most 80% of available resources
155    pub safety_margin: f64,
156
157    /// Enable aggressive optimizations (may reduce stability)
158    pub aggressive_mode: bool,
159}
160
161impl Default for AutoTunerConfig {
162    fn default() -> Self {
163        Self {
164            enable_auto_adjust: true,
165            adjustment_interval: Duration::from_secs(300), // 5 minutes
166            stabilization_period: Duration::from_secs(60), // 1 minute
167            safety_margin: 0.2,
168            aggressive_mode: false,
169        }
170    }
171}
172
173impl AutoTunerConfig {
174    /// Conservative tuning for production environments
175    pub fn conservative() -> Self {
176        Self {
177            enable_auto_adjust: true,
178            adjustment_interval: Duration::from_secs(600), // 10 minutes
179            stabilization_period: Duration::from_secs(120), // 2 minutes
180            safety_margin: 0.3,
181            aggressive_mode: false,
182        }
183    }
184
185    /// Aggressive tuning for development/testing
186    pub fn aggressive() -> Self {
187        Self {
188            enable_auto_adjust: true,
189            adjustment_interval: Duration::from_secs(60), // 1 minute
190            stabilization_period: Duration::from_secs(30), // 30 seconds
191            safety_margin: 0.1,
192            aggressive_mode: true,
193        }
194    }
195}
196
197/// Statistics tracked by the auto-tuner
198#[derive(Debug, Clone, Default)]
199pub struct AutoTunerStats {
200    /// Number of configuration adjustments made
201    pub adjustments_made: u64,
202
203    /// Number of times system resources were analyzed
204    pub resource_checks: u64,
205
206    /// Number of times workload was analyzed
207    pub workload_checks: u64,
208
209    /// Timestamp of last adjustment
210    pub last_adjustment: Option<Instant>,
211
212    /// Current optimization score (0.0 to 1.0, higher is better)
213    pub optimization_score: f64,
214}
215
216/// Automatic network configuration tuner
217pub struct AutoTuner {
218    config: AutoTunerConfig,
219    system_resources: Option<SystemResources>,
220    workload_profile: WorkloadProfile,
221    stats: Arc<RwLock<AutoTunerStats>>,
222    monitoring_active: Arc<RwLock<bool>>,
223}
224
225impl AutoTuner {
226    /// Create a new auto-tuner with default configuration
227    pub fn new() -> Self {
228        Self::with_config(AutoTunerConfig::default())
229    }
230
231    /// Create a new auto-tuner with custom configuration
232    pub fn with_config(config: AutoTunerConfig) -> Self {
233        Self {
234            config,
235            system_resources: None,
236            workload_profile: WorkloadProfile::default(),
237            stats: Arc::new(RwLock::new(AutoTunerStats::default())),
238            monitoring_active: Arc::new(RwLock::new(false)),
239        }
240    }
241
242    /// Analyze system resources
243    pub async fn analyze_system(&mut self) -> Result<SystemResources, AutoTunerError> {
244        let resources = SystemResources::detect()?;
245        self.system_resources = Some(resources.clone());
246
247        let mut stats = self.stats.write();
248        stats.resource_checks += 1;
249
250        Ok(resources)
251    }
252
253    /// Generate optimized network configuration based on detected resources
254    pub async fn generate_config(&mut self) -> Result<NetworkConfig, AutoTunerError> {
255        // Ensure we have system resources
256        if self.system_resources.is_none() {
257            self.analyze_system().await?;
258        }
259
260        let resources = self.system_resources.as_ref().unwrap();
261        let usable_factor = 1.0 - self.config.safety_margin;
262
263        // Determine appropriate preset based on resources
264        let mut config = match resources.memory_category() {
265            "very_low" => NetworkConfig::low_memory(),
266            "low" => NetworkConfig::iot(),
267            "medium" => NetworkConfig::mobile(),
268            "high" | "very_high" => {
269                if resources.is_battery_powered {
270                    NetworkConfig::mobile()
271                } else {
272                    NetworkConfig::high_performance()
273                }
274            }
275            _ => NetworkConfig::default(),
276        };
277
278        // Adjust connection limits based on CPU cores
279        let base_connections = resources.cpu_cores * 50;
280        config.max_connections = Some((base_connections as f64 * usable_factor) as usize);
281
282        // Adjust memory-sensitive parameters
283        let memory_mb = resources.available_memory / (1024 * 1024);
284        if memory_mb < 256 {
285            config.connection_buffer_size = 8 * 1024; // 8 KB
286            config.max_connections = Some(16);
287        } else if memory_mb < 512 {
288            config.connection_buffer_size = 16 * 1024; // 16 KB
289            config.max_connections = Some(32);
290        }
291
292        // Enable NAT traversal unless it's a high-performance server
293        config.enable_nat_traversal =
294            resources.memory_category() != "very_high" || resources.is_battery_powered;
295
296        let mut stats = self.stats.write();
297        stats.adjustments_made += 1;
298        stats.last_adjustment = Some(Instant::now());
299        stats.optimization_score = self.calculate_optimization_score();
300
301        Ok(config)
302    }
303
304    /// Update workload profile based on observed metrics
305    pub fn update_workload(
306        &mut self,
307        connections: usize,
308        query_rate: f64,
309        bandwidth_usage: f64,
310        memory_usage: u64,
311    ) {
312        // Use exponential moving average for smoothing
313        let alpha = 0.3; // Smoothing factor
314
315        let profile = &mut self.workload_profile;
316        profile.avg_connections =
317            profile.avg_connections * (1.0 - alpha) + (connections as f64) * alpha;
318        profile.avg_query_rate = profile.avg_query_rate * (1.0 - alpha) + query_rate * alpha;
319        profile.avg_bandwidth_usage =
320            profile.avg_bandwidth_usage * (1.0 - alpha) + bandwidth_usage * alpha;
321        profile.peak_memory_usage = profile.peak_memory_usage.max(memory_usage);
322
323        // Detect bottlenecks
324        if let Some(resources) = &self.system_resources {
325            let memory_usage_ratio = memory_usage as f64 / resources.available_memory as f64;
326            profile.memory_bound = memory_usage_ratio > 0.8;
327
328            // Simple heuristics for CPU and bandwidth bounds
329            profile.cpu_bound = connections > resources.cpu_cores * 100;
330            profile.bandwidth_bound = resources.network_bandwidth > 0
331                && bandwidth_usage > (resources.network_bandwidth as f64 * 0.8);
332        }
333
334        let mut stats = self.stats.write();
335        stats.workload_checks += 1;
336    }
337
338    /// Start continuous monitoring and auto-adjustment
339    pub async fn start_monitoring(&mut self) -> Result<(), AutoTunerError> {
340        let mut active = self.monitoring_active.write();
341        if *active {
342            return Err(AutoTunerError::MonitoringActive);
343        }
344        *active = true;
345
346        Ok(())
347    }
348
349    /// Stop continuous monitoring
350    pub fn stop_monitoring(&mut self) {
351        let mut active = self.monitoring_active.write();
352        *active = false;
353    }
354
355    /// Check if monitoring is active
356    pub fn is_monitoring(&self) -> bool {
357        *self.monitoring_active.read()
358    }
359
360    /// Get current workload profile
361    pub fn workload_profile(&self) -> &WorkloadProfile {
362        &self.workload_profile
363    }
364
365    /// Get current statistics
366    pub fn stats(&self) -> AutoTunerStats {
367        self.stats.read().clone()
368    }
369
370    /// Calculate optimization score based on current state
371    fn calculate_optimization_score(&self) -> f64 {
372        if self.system_resources.is_none() {
373            return 0.0;
374        }
375
376        let resources = self.system_resources.as_ref().unwrap();
377        let profile = &self.workload_profile;
378
379        // Score based on resource utilization efficiency
380        let memory_score = if profile.peak_memory_usage > 0 {
381            1.0 - (profile.peak_memory_usage as f64 / resources.available_memory as f64).min(1.0)
382        } else {
383            0.5
384        };
385
386        let cpu_score = if profile.cpu_bound { 0.3 } else { 0.8 };
387        let bandwidth_score = if profile.bandwidth_bound { 0.3 } else { 0.8 };
388
389        // Weighted average
390        (memory_score * 0.4 + cpu_score * 0.3 + bandwidth_score * 0.3).clamp(0.0, 1.0)
391    }
392
393    /// Generate recommendations for manual tuning
394    pub fn recommendations(&self) -> Vec<String> {
395        let mut recommendations = Vec::new();
396
397        if let Some(resources) = &self.system_resources {
398            let profile = &self.workload_profile;
399
400            if profile.memory_bound {
401                recommendations.push(
402                    "Memory usage is high. Consider reducing max_connections or enabling low_memory_mode.".to_string()
403                );
404            }
405
406            if profile.cpu_bound {
407                recommendations.push(
408                    format!("CPU usage is high with {} cores. Consider distributing load across more nodes.",
409                        resources.cpu_cores)
410                );
411            }
412
413            if profile.bandwidth_bound {
414                recommendations.push(
415                    "Bandwidth is saturated. Consider enabling bandwidth throttling or upgrading network capacity.".to_string()
416                );
417            }
418
419            if resources.is_battery_powered && profile.avg_query_rate > 10.0 {
420                recommendations.push(
421                    "High DHT query rate on battery power. Consider enabling query batching."
422                        .to_string(),
423                );
424            }
425
426            if resources.memory_category() == "very_low" && !profile.memory_bound {
427                recommendations.push(
428                    "System resources are underutilized. You can increase max_connections for better performance.".to_string()
429                );
430            }
431        } else {
432            recommendations.push("Run analyze_system() first to get recommendations.".to_string());
433        }
434
435        recommendations
436    }
437}
438
439impl Default for AutoTuner {
440    fn default() -> Self {
441        Self::new()
442    }
443}
444
445// Helper function to detect number of CPUs
446mod num_cpus {
447    pub fn get() -> usize {
448        std::thread::available_parallelism()
449            .map(|n| n.get())
450            .unwrap_or(4) // Default to 4 if detection fails
451    }
452}
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    #[tokio::test]
459    async fn test_auto_tuner_creation() {
460        let tuner = AutoTuner::new();
461        assert!(!tuner.is_monitoring());
462    }
463
464    #[tokio::test]
465    async fn test_system_resource_detection() {
466        let mut tuner = AutoTuner::new();
467        let resources = tuner.analyze_system().await.unwrap();
468        assert!(resources.cpu_cores > 0);
469        assert!(resources.total_memory > 0);
470    }
471
472    #[tokio::test]
473    async fn test_config_generation() {
474        let mut tuner = AutoTuner::new();
475        let config = tuner.generate_config().await.unwrap();
476        assert!(config.max_connections.is_some());
477    }
478
479    #[tokio::test]
480    async fn test_workload_update() {
481        let mut tuner = AutoTuner::new();
482        tuner.analyze_system().await.unwrap();
483
484        tuner.update_workload(10, 5.0, 100_000.0, 50_000_000);
485        let profile = tuner.workload_profile();
486        assert!(profile.avg_connections > 0.0);
487    }
488
489    #[tokio::test]
490    async fn test_monitoring_lifecycle() {
491        let mut tuner = AutoTuner::new();
492        assert!(!tuner.is_monitoring());
493
494        tuner.start_monitoring().await.unwrap();
495        assert!(tuner.is_monitoring());
496
497        tuner.stop_monitoring();
498        assert!(!tuner.is_monitoring());
499    }
500
501    #[test]
502    fn test_memory_categories() {
503        let low = SystemResources {
504            total_memory: 100 * 1024 * 1024, // 100 MB
505            available_memory: 50 * 1024 * 1024,
506            cpu_cores: 2,
507            network_bandwidth: 0,
508            is_battery_powered: true,
509        };
510        assert_eq!(low.memory_category(), "very_low");
511
512        let high = SystemResources {
513            total_memory: 16 * 1024 * 1024 * 1024, // 16 GB
514            available_memory: 8 * 1024 * 1024 * 1024,
515            cpu_cores: 8,
516            network_bandwidth: 0,
517            is_battery_powered: false,
518        };
519        assert_eq!(high.memory_category(), "very_high");
520    }
521
522    #[tokio::test]
523    async fn test_statistics_tracking() {
524        let mut tuner = AutoTuner::new();
525
526        let stats_before = tuner.stats();
527        assert_eq!(stats_before.adjustments_made, 0);
528
529        tuner.generate_config().await.unwrap();
530
531        let stats_after = tuner.stats();
532        assert_eq!(stats_after.adjustments_made, 1);
533        assert!(stats_after.last_adjustment.is_some());
534    }
535
536    #[tokio::test]
537    async fn test_recommendations() {
538        let mut tuner = AutoTuner::new();
539        tuner.analyze_system().await.unwrap();
540
541        // Simulate high memory usage to trigger a recommendation
542        if let Some(resources) = &tuner.system_resources {
543            let high_memory = (resources.available_memory as f64 * 0.85) as u64;
544            tuner.update_workload(50, 20.0, 1_000_000.0, high_memory);
545        }
546
547        let recommendations = tuner.recommendations();
548        assert!(!recommendations.is_empty());
549    }
550
551    #[tokio::test]
552    async fn test_config_presets() {
553        let conservative = AutoTunerConfig::conservative();
554        assert!(!conservative.aggressive_mode);
555        assert!(conservative.safety_margin > 0.2);
556
557        let aggressive = AutoTunerConfig::aggressive();
558        assert!(aggressive.aggressive_mode);
559        assert!(aggressive.safety_margin < 0.2);
560    }
561
562    #[tokio::test]
563    async fn test_optimization_score() {
564        let mut tuner = AutoTuner::new();
565        tuner.analyze_system().await.unwrap();
566
567        let stats = tuner.stats();
568        assert!(stats.optimization_score >= 0.0 && stats.optimization_score <= 1.0);
569    }
570}