1use parking_lot::RwLock;
23use std::collections::VecDeque;
24use std::sync::Arc;
25use std::time::{Duration, Instant};
26use thiserror::Error;
27
28#[derive(Debug, Error)]
30pub enum ProfilerError {
31 #[error("Profiler not started")]
32 NotStarted,
33
34 #[error("System information unavailable")]
35 SystemInfoUnavailable,
36
37 #[error("Insufficient samples for analysis")]
38 InsufficientSamples,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum ArmDevice {
44 RaspberryPi,
46 Jetson,
48 Generic,
50 Unknown,
52}
53
54impl ArmDevice {
55 pub fn detect() -> Self {
57 #[cfg(target_arch = "aarch64")]
59 {
60 Self::Generic
61 }
62 #[cfg(target_arch = "arm")]
63 {
64 Self::RaspberryPi
65 }
66 #[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
67 {
68 Self::Unknown
69 }
70 }
71
72 pub fn recommended_config(&self) -> ProfilerConfig {
74 match self {
75 ArmDevice::RaspberryPi => ProfilerConfig::raspberry_pi(),
76 ArmDevice::Jetson => ProfilerConfig::jetson(),
77 ArmDevice::Generic => ProfilerConfig::default(),
78 ArmDevice::Unknown => ProfilerConfig::default(),
79 }
80 }
81}
82
83#[derive(Debug, Clone)]
85pub struct ProfilerConfig {
86 pub track_cpu: bool,
88
89 pub track_memory: bool,
91
92 pub track_throughput: bool,
94
95 pub track_latency: bool,
97
98 pub sample_interval: Duration,
100
101 pub max_samples: usize,
103
104 pub track_thermal: bool,
106}
107
108impl Default for ProfilerConfig {
109 fn default() -> Self {
110 Self {
111 track_cpu: true,
112 track_memory: true,
113 track_throughput: true,
114 track_latency: true,
115 sample_interval: Duration::from_secs(1),
116 max_samples: 1000,
117 track_thermal: false,
118 }
119 }
120}
121
122impl ProfilerConfig {
123 pub fn raspberry_pi() -> Self {
125 Self {
126 track_cpu: true,
127 track_memory: true,
128 track_throughput: true,
129 track_latency: true,
130 sample_interval: Duration::from_secs(2),
131 max_samples: 500,
132 track_thermal: true, }
134 }
135
136 pub fn jetson() -> Self {
138 Self {
139 track_cpu: true,
140 track_memory: true,
141 track_throughput: true,
142 track_latency: true,
143 sample_interval: Duration::from_millis(500),
144 max_samples: 2000, track_thermal: true,
146 }
147 }
148}
149
150#[derive(Debug, Clone)]
152pub struct PerformanceSample {
153 pub timestamp: Instant,
155 pub cpu_usage: Option<f64>,
157 pub memory_usage: Option<u64>,
159 pub throughput: Option<u64>,
161 pub latency_us: Option<u64>,
163 pub temperature: Option<f32>,
165}
166
167#[derive(Debug, Clone)]
169pub struct PerformanceStats {
170 pub avg_cpu: f64,
172 pub peak_cpu: f64,
174 pub avg_memory: u64,
176 pub peak_memory: u64,
178 pub avg_throughput: u64,
180 pub peak_throughput: u64,
182 pub avg_latency: u64,
184 pub p95_latency: u64,
186 pub p99_latency: u64,
188 pub avg_temperature: Option<f32>,
190 pub peak_temperature: Option<f32>,
192 pub sample_count: usize,
194 pub duration: Duration,
196}
197
198pub struct ArmProfiler {
200 config: ProfilerConfig,
202 device: ArmDevice,
204 samples: Arc<RwLock<VecDeque<PerformanceSample>>>,
206 start_time: Option<Instant>,
208 last_sample: Arc<RwLock<Option<Instant>>>,
210}
211
212impl ArmProfiler {
213 pub fn new(config: ProfilerConfig) -> Self {
215 let device = ArmDevice::detect();
216 Self {
217 config,
218 device,
219 samples: Arc::new(RwLock::new(VecDeque::new())),
220 start_time: None,
221 last_sample: Arc::new(RwLock::new(None)),
222 }
223 }
224
225 pub fn auto_detect() -> Self {
227 let device = ArmDevice::detect();
228 let config = device.recommended_config();
229 Self::new(config)
230 }
231
232 pub fn start(&mut self) {
234 self.start_time = Some(Instant::now());
235 *self.last_sample.write() = Some(Instant::now());
236 }
237
238 pub fn stop(&mut self) {
240 self.start_time = None;
241 }
242
243 pub fn record_sample(&self, sample: PerformanceSample) {
245 let mut samples = self.samples.write();
246
247 samples.push_back(sample);
249
250 while samples.len() > self.config.max_samples {
252 samples.pop_front();
253 }
254
255 *self.last_sample.write() = Some(Instant::now());
257 }
258
259 pub fn record_cpu(&self, cpu_usage: f64) {
261 if !self.config.track_cpu {
262 return;
263 }
264
265 let sample = PerformanceSample {
266 timestamp: Instant::now(),
267 cpu_usage: Some(cpu_usage),
268 memory_usage: None,
269 throughput: None,
270 latency_us: None,
271 temperature: None,
272 };
273
274 self.record_sample(sample);
275 }
276
277 pub fn record_memory(&self, memory_bytes: u64) {
279 if !self.config.track_memory {
280 return;
281 }
282
283 let sample = PerformanceSample {
284 timestamp: Instant::now(),
285 cpu_usage: None,
286 memory_usage: Some(memory_bytes),
287 throughput: None,
288 latency_us: None,
289 temperature: None,
290 };
291
292 self.record_sample(sample);
293 }
294
295 pub fn record_throughput(&self, bytes_per_sec: u64) {
297 if !self.config.track_throughput {
298 return;
299 }
300
301 let sample = PerformanceSample {
302 timestamp: Instant::now(),
303 cpu_usage: None,
304 memory_usage: None,
305 throughput: Some(bytes_per_sec),
306 latency_us: None,
307 temperature: None,
308 };
309
310 self.record_sample(sample);
311 }
312
313 pub fn record_latency(&self, latency: Duration) {
315 if !self.config.track_latency {
316 return;
317 }
318
319 let sample = PerformanceSample {
320 timestamp: Instant::now(),
321 cpu_usage: None,
322 memory_usage: None,
323 throughput: None,
324 latency_us: Some(latency.as_micros() as u64),
325 temperature: None,
326 };
327
328 self.record_sample(sample);
329 }
330
331 pub fn stats(&self) -> Result<PerformanceStats, ProfilerError> {
333 let samples = self.samples.read();
334
335 if samples.is_empty() {
336 return Err(ProfilerError::InsufficientSamples);
337 }
338
339 let duration = self
340 .start_time
341 .map(|start| start.elapsed())
342 .unwrap_or_default();
343
344 let cpu_values: Vec<f64> = samples.iter().filter_map(|s| s.cpu_usage).collect();
346
347 let avg_cpu = if !cpu_values.is_empty() {
348 cpu_values.iter().sum::<f64>() / cpu_values.len() as f64
349 } else {
350 0.0
351 };
352
353 let peak_cpu = cpu_values.iter().cloned().fold(0.0f64, |a, b| a.max(b));
354
355 let memory_values: Vec<u64> = samples.iter().filter_map(|s| s.memory_usage).collect();
357
358 let avg_memory = if !memory_values.is_empty() {
359 memory_values.iter().sum::<u64>() / memory_values.len() as u64
360 } else {
361 0
362 };
363
364 let peak_memory = memory_values.iter().cloned().max().unwrap_or(0);
365
366 let throughput_values: Vec<u64> = samples.iter().filter_map(|s| s.throughput).collect();
368
369 let avg_throughput = if !throughput_values.is_empty() {
370 throughput_values.iter().sum::<u64>() / throughput_values.len() as u64
371 } else {
372 0
373 };
374
375 let peak_throughput = throughput_values.iter().cloned().max().unwrap_or(0);
376
377 let mut latency_values: Vec<u64> = samples.iter().filter_map(|s| s.latency_us).collect();
379
380 latency_values.sort_unstable();
381
382 let avg_latency = if !latency_values.is_empty() {
383 latency_values.iter().sum::<u64>() / latency_values.len() as u64
384 } else {
385 0
386 };
387
388 let p95_latency = if !latency_values.is_empty() {
389 let idx = (latency_values.len() as f64 * 0.95) as usize;
390 latency_values.get(idx).cloned().unwrap_or(0)
391 } else {
392 0
393 };
394
395 let p99_latency = if !latency_values.is_empty() {
396 let idx = (latency_values.len() as f64 * 0.99) as usize;
397 latency_values.get(idx).cloned().unwrap_or(0)
398 } else {
399 0
400 };
401
402 let temp_values: Vec<f32> = samples.iter().filter_map(|s| s.temperature).collect();
404
405 let avg_temperature = if !temp_values.is_empty() {
406 Some(temp_values.iter().sum::<f32>() / temp_values.len() as f32)
407 } else {
408 None
409 };
410
411 let peak_temperature = if !temp_values.is_empty() {
412 Some(temp_values.iter().cloned().fold(0.0f32, |a, b| a.max(b)))
413 } else {
414 None
415 };
416
417 Ok(PerformanceStats {
418 avg_cpu,
419 peak_cpu,
420 avg_memory,
421 peak_memory,
422 avg_throughput,
423 peak_throughput,
424 avg_latency,
425 p95_latency,
426 p99_latency,
427 avg_temperature,
428 peak_temperature,
429 sample_count: samples.len(),
430 duration,
431 })
432 }
433
434 pub fn device(&self) -> &ArmDevice {
436 &self.device
437 }
438
439 pub fn config(&self) -> &ProfilerConfig {
441 &self.config
442 }
443
444 pub fn clear(&self) {
446 self.samples.write().clear();
447 }
448
449 pub fn sample_count(&self) -> usize {
451 self.samples.read().len()
452 }
453}
454
455#[cfg(test)]
456mod tests {
457 use super::*;
458
459 #[test]
460 fn test_profiler_creation() {
461 let config = ProfilerConfig::default();
462 let profiler = ArmProfiler::new(config);
463
464 assert!(profiler.sample_count() == 0);
465 }
466
467 #[test]
468 fn test_auto_detect() {
469 let profiler = ArmProfiler::auto_detect();
470 assert!(profiler.sample_count() == 0);
471 }
472
473 #[test]
474 fn test_record_cpu() {
475 let profiler = ArmProfiler::auto_detect();
476 profiler.record_cpu(50.0);
477
478 assert_eq!(profiler.sample_count(), 1);
479 }
480
481 #[test]
482 fn test_record_memory() {
483 let profiler = ArmProfiler::auto_detect();
484 profiler.record_memory(1024 * 1024);
485
486 assert_eq!(profiler.sample_count(), 1);
487 }
488
489 #[test]
490 fn test_record_throughput() {
491 let profiler = ArmProfiler::auto_detect();
492 profiler.record_throughput(1000000);
493
494 assert_eq!(profiler.sample_count(), 1);
495 }
496
497 #[test]
498 fn test_record_latency() {
499 let profiler = ArmProfiler::auto_detect();
500 profiler.record_latency(Duration::from_millis(10));
501
502 assert_eq!(profiler.sample_count(), 1);
503 }
504
505 #[test]
506 fn test_stats_calculation() {
507 let profiler = ArmProfiler::auto_detect();
508
509 profiler.record_cpu(30.0);
511 profiler.record_cpu(50.0);
512 profiler.record_cpu(70.0);
513
514 profiler.record_memory(1024);
515 profiler.record_memory(2048);
516 profiler.record_memory(3072);
517
518 let stats = profiler.stats().unwrap();
519
520 assert_eq!(stats.avg_cpu, 50.0);
521 assert_eq!(stats.peak_cpu, 70.0);
522 assert_eq!(stats.avg_memory, 2048);
523 assert_eq!(stats.peak_memory, 3072);
524 }
525
526 #[test]
527 fn test_latency_percentiles() {
528 let profiler = ArmProfiler::auto_detect();
529
530 for i in 1..=100 {
532 profiler.record_latency(Duration::from_micros(i * 10));
533 }
534
535 let stats = profiler.stats().unwrap();
536
537 assert!(stats.avg_latency > 0);
538 assert!(stats.p95_latency > stats.avg_latency);
539 assert!(stats.p99_latency > stats.p95_latency);
540 }
541
542 #[test]
543 fn test_max_samples_limit() {
544 let config = ProfilerConfig {
545 max_samples: 10,
546 ..Default::default()
547 };
548
549 let profiler = ArmProfiler::new(config);
550
551 for i in 0..20 {
553 profiler.record_cpu(i as f64);
554 }
555
556 assert_eq!(profiler.sample_count(), 10);
558 }
559
560 #[test]
561 fn test_clear_samples() {
562 let profiler = ArmProfiler::auto_detect();
563
564 profiler.record_cpu(50.0);
565 profiler.record_cpu(60.0);
566 assert_eq!(profiler.sample_count(), 2);
567
568 profiler.clear();
569 assert_eq!(profiler.sample_count(), 0);
570 }
571
572 #[test]
573 fn test_device_configs() {
574 let rpi_config = ProfilerConfig::raspberry_pi();
575 let jetson_config = ProfilerConfig::jetson();
576
577 assert!(rpi_config.sample_interval > jetson_config.sample_interval);
578 assert!(rpi_config.max_samples < jetson_config.max_samples);
579 }
580
581 #[test]
582 fn test_insufficient_samples_error() {
583 let profiler = ArmProfiler::auto_detect();
584 let result = profiler.stats();
585
586 assert!(matches!(result, Err(ProfilerError::InsufficientSamples)));
587 }
588}