ipfrs_network/
arm_profiler.rs

1//! ARM Performance Profiling for Network Operations
2//!
3//! This module provides performance profiling utilities specifically designed
4//! for ARM devices including Raspberry Pi, Jetson, and other embedded platforms.
5//!
6//! ## Features
7//!
8//! - CPU usage tracking
9//! - Memory usage monitoring
10//! - Network throughput measurement
11//! - Latency profiling
12//! - Battery/power consumption estimation
13//! - Thermal monitoring (on supported devices)
14//!
15//! ## Use Cases
16//!
17//! - Performance optimization for ARM devices
18//! - Identifying bottlenecks on resource-constrained devices
19//! - Regression testing across ARM platforms
20//! - Power consumption analysis
21
22use parking_lot::RwLock;
23use std::collections::VecDeque;
24use std::sync::Arc;
25use std::time::{Duration, Instant};
26use thiserror::Error;
27
28/// Errors that can occur during profiling
29#[derive(Debug, Error)]
30pub enum ProfilerError {
31    #[error("Profiler not started")]
32    NotStarted,
33
34    #[error("System information unavailable")]
35    SystemInfoUnavailable,
36
37    #[error("Insufficient samples for analysis")]
38    InsufficientSamples,
39}
40
41/// ARM device profile
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum ArmDevice {
44    /// Raspberry Pi (various models)
45    RaspberryPi,
46    /// NVIDIA Jetson (Nano, TX2, Xavier, etc.)
47    Jetson,
48    /// Generic ARM device
49    Generic,
50    /// Unknown device
51    Unknown,
52}
53
54impl ArmDevice {
55    /// Detect ARM device type from system information
56    pub fn detect() -> Self {
57        // In a real implementation, read /proc/cpuinfo or device tree
58        #[cfg(target_arch = "aarch64")]
59        {
60            Self::Generic
61        }
62        #[cfg(target_arch = "arm")]
63        {
64            Self::RaspberryPi
65        }
66        #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
67        {
68            Self::Unknown
69        }
70    }
71
72    /// Get recommended configuration for this device
73    pub fn recommended_config(&self) -> ProfilerConfig {
74        match self {
75            ArmDevice::RaspberryPi => ProfilerConfig::raspberry_pi(),
76            ArmDevice::Jetson => ProfilerConfig::jetson(),
77            ArmDevice::Generic => ProfilerConfig::default(),
78            ArmDevice::Unknown => ProfilerConfig::default(),
79        }
80    }
81}
82
83/// Performance profiling configuration
84#[derive(Debug, Clone)]
85pub struct ProfilerConfig {
86    /// Enable CPU usage tracking
87    pub track_cpu: bool,
88
89    /// Enable memory usage tracking
90    pub track_memory: bool,
91
92    /// Enable network throughput tracking
93    pub track_throughput: bool,
94
95    /// Enable latency profiling
96    pub track_latency: bool,
97
98    /// Sample interval for metrics
99    pub sample_interval: Duration,
100
101    /// Maximum number of samples to keep
102    pub max_samples: usize,
103
104    /// Enable thermal monitoring (if supported)
105    pub track_thermal: bool,
106}
107
108impl Default for ProfilerConfig {
109    fn default() -> Self {
110        Self {
111            track_cpu: true,
112            track_memory: true,
113            track_throughput: true,
114            track_latency: true,
115            sample_interval: Duration::from_secs(1),
116            max_samples: 1000,
117            track_thermal: false,
118        }
119    }
120}
121
122impl ProfilerConfig {
123    /// Configuration optimized for Raspberry Pi
124    pub fn raspberry_pi() -> Self {
125        Self {
126            track_cpu: true,
127            track_memory: true,
128            track_throughput: true,
129            track_latency: true,
130            sample_interval: Duration::from_secs(2),
131            max_samples: 500,
132            track_thermal: true, // RPi has temp sensor
133        }
134    }
135
136    /// Configuration optimized for NVIDIA Jetson
137    pub fn jetson() -> Self {
138        Self {
139            track_cpu: true,
140            track_memory: true,
141            track_throughput: true,
142            track_latency: true,
143            sample_interval: Duration::from_millis(500),
144            max_samples: 2000, // Jetson has more resources
145            track_thermal: true,
146        }
147    }
148}
149
150/// Performance sample
151#[derive(Debug, Clone)]
152pub struct PerformanceSample {
153    /// Timestamp of the sample
154    pub timestamp: Instant,
155    /// CPU usage percentage (0.0-100.0)
156    pub cpu_usage: Option<f64>,
157    /// Memory usage in bytes
158    pub memory_usage: Option<u64>,
159    /// Network throughput (bytes/sec)
160    pub throughput: Option<u64>,
161    /// Average latency in microseconds
162    pub latency_us: Option<u64>,
163    /// Temperature in Celsius (if available)
164    pub temperature: Option<f32>,
165}
166
167/// Performance statistics
168#[derive(Debug, Clone)]
169pub struct PerformanceStats {
170    /// Average CPU usage
171    pub avg_cpu: f64,
172    /// Peak CPU usage
173    pub peak_cpu: f64,
174    /// Average memory usage (bytes)
175    pub avg_memory: u64,
176    /// Peak memory usage (bytes)
177    pub peak_memory: u64,
178    /// Average throughput (bytes/sec)
179    pub avg_throughput: u64,
180    /// Peak throughput (bytes/sec)
181    pub peak_throughput: u64,
182    /// Average latency (microseconds)
183    pub avg_latency: u64,
184    /// 95th percentile latency (microseconds)
185    pub p95_latency: u64,
186    /// 99th percentile latency (microseconds)
187    pub p99_latency: u64,
188    /// Average temperature (Celsius)
189    pub avg_temperature: Option<f32>,
190    /// Peak temperature (Celsius)
191    pub peak_temperature: Option<f32>,
192    /// Number of samples
193    pub sample_count: usize,
194    /// Profiling duration
195    pub duration: Duration,
196}
197
198/// ARM performance profiler
199pub struct ArmProfiler {
200    /// Configuration
201    config: ProfilerConfig,
202    /// Device type
203    device: ArmDevice,
204    /// Performance samples
205    samples: Arc<RwLock<VecDeque<PerformanceSample>>>,
206    /// Start time
207    start_time: Option<Instant>,
208    /// Last sample time
209    last_sample: Arc<RwLock<Option<Instant>>>,
210}
211
212impl ArmProfiler {
213    /// Create a new ARM profiler
214    pub fn new(config: ProfilerConfig) -> Self {
215        let device = ArmDevice::detect();
216        Self {
217            config,
218            device,
219            samples: Arc::new(RwLock::new(VecDeque::new())),
220            start_time: None,
221            last_sample: Arc::new(RwLock::new(None)),
222        }
223    }
224
225    /// Create with auto-detected device configuration
226    pub fn auto_detect() -> Self {
227        let device = ArmDevice::detect();
228        let config = device.recommended_config();
229        Self::new(config)
230    }
231
232    /// Start profiling
233    pub fn start(&mut self) {
234        self.start_time = Some(Instant::now());
235        *self.last_sample.write() = Some(Instant::now());
236    }
237
238    /// Stop profiling
239    pub fn stop(&mut self) {
240        self.start_time = None;
241    }
242
243    /// Record a performance sample
244    pub fn record_sample(&self, sample: PerformanceSample) {
245        let mut samples = self.samples.write();
246
247        // Add new sample
248        samples.push_back(sample);
249
250        // Limit sample count
251        while samples.len() > self.config.max_samples {
252            samples.pop_front();
253        }
254
255        // Update last sample time
256        *self.last_sample.write() = Some(Instant::now());
257    }
258
259    /// Record CPU usage
260    pub fn record_cpu(&self, cpu_usage: f64) {
261        if !self.config.track_cpu {
262            return;
263        }
264
265        let sample = PerformanceSample {
266            timestamp: Instant::now(),
267            cpu_usage: Some(cpu_usage),
268            memory_usage: None,
269            throughput: None,
270            latency_us: None,
271            temperature: None,
272        };
273
274        self.record_sample(sample);
275    }
276
277    /// Record memory usage
278    pub fn record_memory(&self, memory_bytes: u64) {
279        if !self.config.track_memory {
280            return;
281        }
282
283        let sample = PerformanceSample {
284            timestamp: Instant::now(),
285            cpu_usage: None,
286            memory_usage: Some(memory_bytes),
287            throughput: None,
288            latency_us: None,
289            temperature: None,
290        };
291
292        self.record_sample(sample);
293    }
294
295    /// Record network throughput
296    pub fn record_throughput(&self, bytes_per_sec: u64) {
297        if !self.config.track_throughput {
298            return;
299        }
300
301        let sample = PerformanceSample {
302            timestamp: Instant::now(),
303            cpu_usage: None,
304            memory_usage: None,
305            throughput: Some(bytes_per_sec),
306            latency_us: None,
307            temperature: None,
308        };
309
310        self.record_sample(sample);
311    }
312
313    /// Record latency
314    pub fn record_latency(&self, latency: Duration) {
315        if !self.config.track_latency {
316            return;
317        }
318
319        let sample = PerformanceSample {
320            timestamp: Instant::now(),
321            cpu_usage: None,
322            memory_usage: None,
323            throughput: None,
324            latency_us: Some(latency.as_micros() as u64),
325            temperature: None,
326        };
327
328        self.record_sample(sample);
329    }
330
331    /// Get performance statistics
332    pub fn stats(&self) -> Result<PerformanceStats, ProfilerError> {
333        let samples = self.samples.read();
334
335        if samples.is_empty() {
336            return Err(ProfilerError::InsufficientSamples);
337        }
338
339        let duration = self
340            .start_time
341            .map(|start| start.elapsed())
342            .unwrap_or_default();
343
344        // Calculate CPU stats
345        let cpu_values: Vec<f64> = samples.iter().filter_map(|s| s.cpu_usage).collect();
346
347        let avg_cpu = if !cpu_values.is_empty() {
348            cpu_values.iter().sum::<f64>() / cpu_values.len() as f64
349        } else {
350            0.0
351        };
352
353        let peak_cpu = cpu_values.iter().cloned().fold(0.0f64, |a, b| a.max(b));
354
355        // Calculate memory stats
356        let memory_values: Vec<u64> = samples.iter().filter_map(|s| s.memory_usage).collect();
357
358        let avg_memory = if !memory_values.is_empty() {
359            memory_values.iter().sum::<u64>() / memory_values.len() as u64
360        } else {
361            0
362        };
363
364        let peak_memory = memory_values.iter().cloned().max().unwrap_or(0);
365
366        // Calculate throughput stats
367        let throughput_values: Vec<u64> = samples.iter().filter_map(|s| s.throughput).collect();
368
369        let avg_throughput = if !throughput_values.is_empty() {
370            throughput_values.iter().sum::<u64>() / throughput_values.len() as u64
371        } else {
372            0
373        };
374
375        let peak_throughput = throughput_values.iter().cloned().max().unwrap_or(0);
376
377        // Calculate latency stats
378        let mut latency_values: Vec<u64> = samples.iter().filter_map(|s| s.latency_us).collect();
379
380        latency_values.sort_unstable();
381
382        let avg_latency = if !latency_values.is_empty() {
383            latency_values.iter().sum::<u64>() / latency_values.len() as u64
384        } else {
385            0
386        };
387
388        let p95_latency = if !latency_values.is_empty() {
389            let idx = (latency_values.len() as f64 * 0.95) as usize;
390            latency_values.get(idx).cloned().unwrap_or(0)
391        } else {
392            0
393        };
394
395        let p99_latency = if !latency_values.is_empty() {
396            let idx = (latency_values.len() as f64 * 0.99) as usize;
397            latency_values.get(idx).cloned().unwrap_or(0)
398        } else {
399            0
400        };
401
402        // Calculate temperature stats
403        let temp_values: Vec<f32> = samples.iter().filter_map(|s| s.temperature).collect();
404
405        let avg_temperature = if !temp_values.is_empty() {
406            Some(temp_values.iter().sum::<f32>() / temp_values.len() as f32)
407        } else {
408            None
409        };
410
411        let peak_temperature = if !temp_values.is_empty() {
412            Some(temp_values.iter().cloned().fold(0.0f32, |a, b| a.max(b)))
413        } else {
414            None
415        };
416
417        Ok(PerformanceStats {
418            avg_cpu,
419            peak_cpu,
420            avg_memory,
421            peak_memory,
422            avg_throughput,
423            peak_throughput,
424            avg_latency,
425            p95_latency,
426            p99_latency,
427            avg_temperature,
428            peak_temperature,
429            sample_count: samples.len(),
430            duration,
431        })
432    }
433
434    /// Get the detected device type
435    pub fn device(&self) -> &ArmDevice {
436        &self.device
437    }
438
439    /// Get the configuration
440    pub fn config(&self) -> &ProfilerConfig {
441        &self.config
442    }
443
444    /// Clear all samples
445    pub fn clear(&self) {
446        self.samples.write().clear();
447    }
448
449    /// Get sample count
450    pub fn sample_count(&self) -> usize {
451        self.samples.read().len()
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn test_profiler_creation() {
461        let config = ProfilerConfig::default();
462        let profiler = ArmProfiler::new(config);
463
464        assert!(profiler.sample_count() == 0);
465    }
466
467    #[test]
468    fn test_auto_detect() {
469        let profiler = ArmProfiler::auto_detect();
470        assert!(profiler.sample_count() == 0);
471    }
472
473    #[test]
474    fn test_record_cpu() {
475        let profiler = ArmProfiler::auto_detect();
476        profiler.record_cpu(50.0);
477
478        assert_eq!(profiler.sample_count(), 1);
479    }
480
481    #[test]
482    fn test_record_memory() {
483        let profiler = ArmProfiler::auto_detect();
484        profiler.record_memory(1024 * 1024);
485
486        assert_eq!(profiler.sample_count(), 1);
487    }
488
489    #[test]
490    fn test_record_throughput() {
491        let profiler = ArmProfiler::auto_detect();
492        profiler.record_throughput(1000000);
493
494        assert_eq!(profiler.sample_count(), 1);
495    }
496
497    #[test]
498    fn test_record_latency() {
499        let profiler = ArmProfiler::auto_detect();
500        profiler.record_latency(Duration::from_millis(10));
501
502        assert_eq!(profiler.sample_count(), 1);
503    }
504
505    #[test]
506    fn test_stats_calculation() {
507        let profiler = ArmProfiler::auto_detect();
508
509        // Record some samples
510        profiler.record_cpu(30.0);
511        profiler.record_cpu(50.0);
512        profiler.record_cpu(70.0);
513
514        profiler.record_memory(1024);
515        profiler.record_memory(2048);
516        profiler.record_memory(3072);
517
518        let stats = profiler.stats().unwrap();
519
520        assert_eq!(stats.avg_cpu, 50.0);
521        assert_eq!(stats.peak_cpu, 70.0);
522        assert_eq!(stats.avg_memory, 2048);
523        assert_eq!(stats.peak_memory, 3072);
524    }
525
526    #[test]
527    fn test_latency_percentiles() {
528        let profiler = ArmProfiler::auto_detect();
529
530        // Record latencies
531        for i in 1..=100 {
532            profiler.record_latency(Duration::from_micros(i * 10));
533        }
534
535        let stats = profiler.stats().unwrap();
536
537        assert!(stats.avg_latency > 0);
538        assert!(stats.p95_latency > stats.avg_latency);
539        assert!(stats.p99_latency > stats.p95_latency);
540    }
541
542    #[test]
543    fn test_max_samples_limit() {
544        let config = ProfilerConfig {
545            max_samples: 10,
546            ..Default::default()
547        };
548
549        let profiler = ArmProfiler::new(config);
550
551        // Record more samples than the limit
552        for i in 0..20 {
553            profiler.record_cpu(i as f64);
554        }
555
556        // Should only keep the last 10 samples
557        assert_eq!(profiler.sample_count(), 10);
558    }
559
560    #[test]
561    fn test_clear_samples() {
562        let profiler = ArmProfiler::auto_detect();
563
564        profiler.record_cpu(50.0);
565        profiler.record_cpu(60.0);
566        assert_eq!(profiler.sample_count(), 2);
567
568        profiler.clear();
569        assert_eq!(profiler.sample_count(), 0);
570    }
571
572    #[test]
573    fn test_device_configs() {
574        let rpi_config = ProfilerConfig::raspberry_pi();
575        let jetson_config = ProfilerConfig::jetson();
576
577        assert!(rpi_config.sample_interval > jetson_config.sample_interval);
578        assert!(rpi_config.max_samples < jetson_config.max_samples);
579    }
580
581    #[test]
582    fn test_insufficient_samples_error() {
583        let profiler = ArmProfiler::auto_detect();
584        let result = profiler.stats();
585
586        assert!(matches!(result, Err(ProfilerError::InsufficientSamples)));
587    }
588}