Skip to main content

scirs2_core/profiling/
hardware_counters.rs

1//! # Hardware Performance Counter Integration
2//!
3//! This module provides integration with hardware performance counters for detailed
4//! performance analysis including CPU cycles, cache misses, branch predictions, and more.
5
6use crate::error::{CoreError, CoreResult};
7use std::collections::HashMap;
8use std::sync::{Arc, Mutex, RwLock};
9use std::time::Instant;
10use thiserror::Error;
11
12/// Error types for hardware performance counters
13#[derive(Error, Debug)]
14pub enum HardwareCounterError {
15    /// Performance counters not available on this platform
16    #[error("Performance counters not available on this platform")]
17    NotAvailable,
18
19    /// Permission denied to access performance counters
20    #[error("Permission denied to access performance counters: {0}")]
21    PermissionDenied(String),
22
23    /// Counter not found
24    #[error("Performance counter not found: {0}")]
25    CounterNotFound(String),
26
27    /// Invalid counter configuration
28    #[error("Invalid counter configuration: {0}")]
29    InvalidConfiguration(String),
30
31    /// System error
32    #[error("System error: {0}")]
33    SystemError(String),
34}
35
36impl From<HardwareCounterError> for CoreError {
37    fn from(err: HardwareCounterError) -> Self {
38        CoreError::ComputationError(crate::error::ErrorContext::new(err.to_string()))
39    }
40}
41
42/// Hardware performance counter types
43#[derive(Debug, Clone, PartialEq, Eq, Hash)]
44pub enum CounterType {
45    // CPU Counters
46    /// CPU cycles
47    CpuCycles,
48    /// Instructions retired
49    Instructions,
50    /// Cache references
51    CacheReferences,
52    /// Cache misses
53    CacheMisses,
54    /// Branch instructions
55    BranchInstructions,
56    /// Branch mispredictions
57    BranchMisses,
58    /// Bus cycles
59    BusCycles,
60    /// Stalled cycles frontend
61    StalledCyclesFrontend,
62    /// Stalled cycles backend
63    StalledCyclesBackend,
64
65    // L1 Cache Counters
66    /// L1 data cache loads
67    L1DCacheLoads,
68    /// L1 data cache load misses
69    L1DCacheLoadMisses,
70    /// L1 data cache stores
71    L1DCacheStores,
72    /// L1 instruction cache loads
73    L1ICacheLoads,
74    /// L1 instruction cache load misses
75    L1ICacheLoadMisses,
76
77    // L2/L3 Cache Counters
78    /// L2 cache loads
79    L2CacheLoads,
80    /// L2 cache load misses
81    L2CacheLoadMisses,
82    /// L3 cache loads
83    L3CacheLoads,
84    /// L3 cache load misses
85    L3CacheLoadMisses,
86
87    // Memory Counters
88    /// DTLB loads
89    DtlbLoads,
90    /// DTLB load misses
91    DtlbLoadMisses,
92    /// ITLB loads
93    ItlbLoads,
94    /// ITLB load misses
95    ItlbLoadMisses,
96
97    // Power/Thermal Counters
98    /// CPU power consumption
99    CpuPower,
100    /// CPU temperature
101    CpuTemperature,
102    /// CPU frequency
103    CpuFrequency,
104
105    // Custom counter for platform-specific counters
106    Custom(String),
107}
108
109impl CounterType {
110    /// Get a human-readable description of the counter
111    pub const fn description(&self) -> &'static str {
112        match self {
113            CounterType::CpuCycles => "CPU cycles",
114            CounterType::Instructions => "Instructions retired",
115            CounterType::CacheReferences => "Cache references",
116            CounterType::CacheMisses => "Cache misses",
117            CounterType::BranchInstructions => "Branch instructions",
118            CounterType::BranchMisses => "Branch mispredictions",
119            CounterType::BusCycles => "Bus cycles",
120            CounterType::StalledCyclesFrontend => "Stalled cycles frontend",
121            CounterType::StalledCyclesBackend => "Stalled cycles backend",
122            CounterType::L1DCacheLoads => "L1 data cache loads",
123            CounterType::L1DCacheLoadMisses => "L1 data cache load misses",
124            CounterType::L1DCacheStores => "L1 data cache stores",
125            CounterType::L1ICacheLoads => "L1 instruction cache loads",
126            CounterType::L1ICacheLoadMisses => "L1 instruction cache load misses",
127            CounterType::L2CacheLoads => "L2 cache loads",
128            CounterType::L2CacheLoadMisses => "L2 cache load misses",
129            CounterType::L3CacheLoads => "L3 cache loads",
130            CounterType::L3CacheLoadMisses => "L3 cache load misses",
131            CounterType::DtlbLoads => "Data TLB loads",
132            CounterType::DtlbLoadMisses => "Data TLB load misses",
133            CounterType::ItlbLoads => "Instruction TLB loads",
134            CounterType::ItlbLoadMisses => "Instruction TLB load misses",
135            CounterType::CpuPower => "CPU power consumption",
136            CounterType::CpuTemperature => "CPU temperature",
137            CounterType::CpuFrequency => "CPU frequency",
138            CounterType::Custom(_) => "Custom counter",
139        }
140    }
141
142    /// Get the unit for this counter type
143    pub const fn unit(&self) -> &'static str {
144        match self {
145            CounterType::CpuCycles
146            | CounterType::Instructions
147            | CounterType::CacheReferences
148            | CounterType::CacheMisses
149            | CounterType::BranchInstructions
150            | CounterType::BranchMisses
151            | CounterType::BusCycles
152            | CounterType::StalledCyclesFrontend
153            | CounterType::StalledCyclesBackend
154            | CounterType::L1DCacheLoads
155            | CounterType::L1DCacheLoadMisses
156            | CounterType::L1DCacheStores
157            | CounterType::L1ICacheLoads
158            | CounterType::L1ICacheLoadMisses
159            | CounterType::L2CacheLoads
160            | CounterType::L2CacheLoadMisses
161            | CounterType::L3CacheLoads
162            | CounterType::L3CacheLoadMisses
163            | CounterType::DtlbLoads
164            | CounterType::DtlbLoadMisses
165            | CounterType::ItlbLoads
166            | CounterType::ItlbLoadMisses => "count",
167            CounterType::CpuPower => "watts",
168            CounterType::CpuTemperature => "celsius",
169            CounterType::CpuFrequency => "hertz",
170            CounterType::Custom(_) => "unknown",
171        }
172    }
173}
174
175/// Performance counter value with metadata
176#[derive(Debug, Clone)]
177pub struct CounterValue {
178    /// Counter type
179    pub countertype: CounterType,
180    /// Raw counter value
181    pub value: u64,
182    /// Timestamp when value was read
183    pub timestamp: Instant,
184    /// Whether the counter is running
185    pub enabled: bool,
186    /// Counter scaling factor (for normalized values)
187    pub scaling_factor: f64,
188}
189
190impl CounterValue {
191    /// Create a new counter value
192    pub fn new(countertype: CounterType, value: u64) -> Self {
193        Self {
194            countertype,
195            value,
196            timestamp: Instant::now(),
197            enabled: true,
198            scaling_factor: 1.0,
199        }
200    }
201
202    /// Get the scaled value
203    pub fn scaled_value(&self) -> f64 {
204        self.value as f64 * self.scaling_factor
205    }
206}
207
208/// Hardware performance counter interface
209pub trait PerformanceCounter: Send + Sync {
210    /// Get available counter types on this platform
211    fn available_counters(&self) -> Vec<CounterType>;
212
213    /// Check if a counter type is available
214    fn is_available(&self, countertype: &CounterType) -> bool;
215
216    /// Start monitoring a counter
217    fn start_counter(&self, countertype: &CounterType) -> CoreResult<()>;
218
219    /// Stop monitoring a counter
220    fn stop_counter(&self, countertype: &CounterType) -> CoreResult<()>;
221
222    /// Read current value of a counter
223    fn read_counter(&self, countertype: &CounterType) -> CoreResult<CounterValue>;
224
225    /// Read multiple counters atomically
226    fn read_counters(&self, countertypes: &[CounterType]) -> CoreResult<Vec<CounterValue>>;
227
228    /// Reset a counter to zero
229    fn reset_counter(&self, countertype: &CounterType) -> CoreResult<()>;
230
231    /// Get counter overflow status
232    fn is_overflowed(&self, countertype: &CounterType) -> CoreResult<bool>;
233}
234
235/// Linux `perf_event` implementation.
236///
237/// When the `profiling_perf` feature is enabled this opens real kernel
238/// performance counters via the `perf-event` crate (the `perf_event_open(2)`
239/// syscall) and reports their genuine values. Without that feature the crate
240/// has no mechanism to talk to the kernel, so every operation returns an
241/// honest [`HardwareCounterError::NotAvailable`] instead of fabricating data.
242#[cfg(target_os = "linux")]
243pub struct LinuxPerfCounter {
244    #[cfg(feature = "profiling_perf")]
245    active_counters: Mutex<HashMap<CounterType, perf_event::Counter>>,
246}
247
248#[cfg(target_os = "linux")]
249impl LinuxPerfCounter {
250    /// Create a new Linux perf counter
251    pub fn new() -> Self {
252        Self {
253            #[cfg(feature = "profiling_perf")]
254            active_counters: Mutex::new(HashMap::new()),
255        }
256    }
257
258    /// Map a [`CounterType`] to the corresponding `perf-event` hardware event.
259    ///
260    /// Returns `None` for counters that are not exposed as generic
261    /// `PERF_TYPE_HARDWARE` events (e.g. detailed L1/L2/L3 cache or TLB events,
262    /// which would require `PERF_TYPE_HW_CACHE` configuration).
263    #[cfg(feature = "profiling_perf")]
264    fn counter_to_hardware(countertype: &CounterType) -> Option<perf_event::events::Hardware> {
265        use perf_event::events::Hardware;
266        match countertype {
267            CounterType::CpuCycles => Some(Hardware::CPU_CYCLES),
268            CounterType::Instructions => Some(Hardware::INSTRUCTIONS),
269            CounterType::CacheReferences => Some(Hardware::CACHE_REFERENCES),
270            CounterType::CacheMisses => Some(Hardware::CACHE_MISSES),
271            CounterType::BranchInstructions => Some(Hardware::BRANCH_INSTRUCTIONS),
272            CounterType::BranchMisses => Some(Hardware::BRANCH_MISSES),
273            CounterType::BusCycles => Some(Hardware::BUS_CYCLES),
274            CounterType::StalledCyclesFrontend => Some(Hardware::STALLED_CYCLES_FRONTEND),
275            CounterType::StalledCyclesBackend => Some(Hardware::STALLED_CYCLES_BACKEND),
276            _ => None,
277        }
278    }
279}
280
281#[cfg(target_os = "linux")]
282impl Default for LinuxPerfCounter {
283    fn default() -> Self {
284        Self::new()
285    }
286}
287
288#[cfg(all(target_os = "linux", feature = "profiling_perf"))]
289impl PerformanceCounter for LinuxPerfCounter {
290    fn available_counters(&self) -> Vec<CounterType> {
291        vec![
292            CounterType::CpuCycles,
293            CounterType::Instructions,
294            CounterType::CacheReferences,
295            CounterType::CacheMisses,
296            CounterType::BranchInstructions,
297            CounterType::BranchMisses,
298            CounterType::BusCycles,
299            CounterType::StalledCyclesFrontend,
300            CounterType::StalledCyclesBackend,
301        ]
302    }
303
304    fn is_available(&self, countertype: &CounterType) -> bool {
305        Self::counter_to_hardware(countertype).is_some()
306    }
307
308    fn start_counter(&self, countertype: &CounterType) -> CoreResult<()> {
309        let hardware = Self::counter_to_hardware(countertype)
310            .ok_or_else(|| HardwareCounterError::CounterNotFound(format!("{countertype:?}")))?;
311
312        // Open a real kernel counter via perf_event_open(2) and enable it.
313        let mut counter = perf_event::Builder::new()
314            .one_cpu(0)
315            .kind(hardware)
316            .build()
317            .map_err(|e| {
318                HardwareCounterError::SystemError(format!(
319                    "perf_event_open failed for {countertype:?}: {e}"
320                ))
321            })?;
322        counter.enable().map_err(|e| {
323            HardwareCounterError::SystemError(format!(
324                "Failed to enable counter {countertype:?}: {e}"
325            ))
326        })?;
327
328        let mut counters = self
329            .active_counters
330            .lock()
331            .map_err(|_| HardwareCounterError::SystemError("Counter map poisoned".to_string()))?;
332        counters.insert(countertype.clone(), counter);
333        Ok(())
334    }
335
336    fn stop_counter(&self, countertype: &CounterType) -> CoreResult<()> {
337        let mut counters = self
338            .active_counters
339            .lock()
340            .map_err(|_| HardwareCounterError::SystemError("Counter map poisoned".to_string()))?;
341        match counters.remove(countertype) {
342            Some(mut counter) => {
343                // Best-effort disable; the fd is closed when `counter` drops.
344                let _ = counter.disable();
345                Ok(())
346            }
347            None => Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into()),
348        }
349    }
350
351    fn read_counter(&self, countertype: &CounterType) -> CoreResult<CounterValue> {
352        let mut counters = self
353            .active_counters
354            .lock()
355            .map_err(|_| HardwareCounterError::SystemError("Counter map poisoned".to_string()))?;
356        match counters.get_mut(countertype) {
357            Some(counter) => {
358                let value = counter.read().map_err(|e| {
359                    HardwareCounterError::SystemError(format!(
360                        "Failed to read counter {countertype:?}: {e}"
361                    ))
362                })?;
363                Ok(CounterValue::new(countertype.clone(), value))
364            }
365            None => Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into()),
366        }
367    }
368
369    fn read_counters(&self, countertypes: &[CounterType]) -> CoreResult<Vec<CounterValue>> {
370        let mut results = Vec::new();
371        for countertype in countertypes {
372            results.push(self.read_counter(countertype)?);
373        }
374        Ok(results)
375    }
376
377    fn reset_counter(&self, countertype: &CounterType) -> CoreResult<()> {
378        let mut counters = self
379            .active_counters
380            .lock()
381            .map_err(|_| HardwareCounterError::SystemError("Counter map poisoned".to_string()))?;
382        match counters.get_mut(countertype) {
383            Some(counter) => counter.reset().map_err(|e| {
384                HardwareCounterError::SystemError(format!(
385                    "Failed to reset counter {countertype:?}: {e}"
386                ))
387                .into()
388            }),
389            None => Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into()),
390        }
391    }
392
393    fn is_overflowed(&self, countertype: &CounterType) -> CoreResult<bool> {
394        // perf hardware counters are 64-bit and the kernel scales/accumulates
395        // them transparently on read; the `perf-event` crate exposes no overflow
396        // flag, so for an active counter we report "not overflowed".
397        let counters = self
398            .active_counters
399            .lock()
400            .map_err(|_| HardwareCounterError::SystemError("Counter map poisoned".to_string()))?;
401        if counters.contains_key(countertype) {
402            Ok(false)
403        } else {
404            Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into())
405        }
406    }
407}
408
409/// Honest fallback for Linux builds without the `profiling_perf` feature.
410///
411/// The crate cannot read hardware counters without the `perf-event`
412/// integration, so rather than returning fabricated numbers every operation
413/// reports that counters are unavailable. Enable the `profiling_perf` feature
414/// for real `perf_event_open(2)`-backed measurements.
415#[cfg(all(target_os = "linux", not(feature = "profiling_perf")))]
416impl PerformanceCounter for LinuxPerfCounter {
417    fn available_counters(&self) -> Vec<CounterType> {
418        Vec::new()
419    }
420
421    fn is_available(&self, _countertype: &CounterType) -> bool {
422        false
423    }
424
425    fn start_counter(&self, _countertype: &CounterType) -> CoreResult<()> {
426        Err(HardwareCounterError::NotAvailable.into())
427    }
428
429    fn stop_counter(&self, _countertype: &CounterType) -> CoreResult<()> {
430        Err(HardwareCounterError::NotAvailable.into())
431    }
432
433    fn read_counter(&self, _countertype: &CounterType) -> CoreResult<CounterValue> {
434        Err(HardwareCounterError::NotAvailable.into())
435    }
436
437    fn read_counters(&self, _countertypes: &[CounterType]) -> CoreResult<Vec<CounterValue>> {
438        Err(HardwareCounterError::NotAvailable.into())
439    }
440
441    fn reset_counter(&self, _countertype: &CounterType) -> CoreResult<()> {
442        Err(HardwareCounterError::NotAvailable.into())
443    }
444
445    fn is_overflowed(&self, _countertype: &CounterType) -> CoreResult<bool> {
446        Err(HardwareCounterError::NotAvailable.into())
447    }
448}
449
450/// Windows Performance Data Helper (PDH) implementation
451#[cfg(target_os = "windows")]
452pub struct WindowsPdhCounter {
453    active_counters: RwLock<HashMap<CounterType, String>>, // PDH counter paths
454}
455
456#[cfg(target_os = "windows")]
457impl WindowsPdhCounter {
458    /// Create a new Windows PDH counter
459    pub fn new() -> Self {
460        Self {
461            active_counters: RwLock::new(HashMap::new()),
462        }
463    }
464
465    /// Convert counter type to PDH counter path
466    fn counter_to_path(countertype: &CounterType) -> Option<String> {
467        match countertype {
468            CounterType::CpuCycles => Some("\\Processor(_Total)\\% Processor Time".to_string()),
469            CounterType::CpuFrequency => {
470                Some("\\Processor Information(_Total)\\Processor Frequency".to_string())
471            }
472            CounterType::CpuPower => Some("\\Power Meter(*)\\Power".to_string()),
473            _ => None,
474        }
475    }
476}
477
478#[cfg(target_os = "windows")]
479impl Default for WindowsPdhCounter {
480    fn default() -> Self {
481        Self::new()
482    }
483}
484
485#[cfg(target_os = "windows")]
486impl PerformanceCounter for WindowsPdhCounter {
487    fn available_counters(&self) -> Vec<CounterType> {
488        vec![
489            CounterType::CpuCycles,
490            CounterType::CpuFrequency,
491            CounterType::CpuPower,
492        ]
493    }
494
495    fn is_available(&self, countertype: &CounterType) -> bool {
496        Self::counter_to_path(countertype).is_some()
497    }
498
499    fn start_counter(&self, countertype: &CounterType) -> CoreResult<()> {
500        if let Some(path) = Self::counter_to_path(countertype) {
501            // In real implementation: PDH API calls
502            let mut counters = self.active_counters.write().expect("Operation failed");
503            counters.insert(countertype.clone(), path);
504            Ok(())
505        } else {
506            Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into())
507        }
508    }
509
510    fn stop_counter(&self, countertype: &CounterType) -> CoreResult<()> {
511        let mut counters = self.active_counters.write().expect("Operation failed");
512        if counters.remove(countertype).is_some() {
513            Ok(())
514        } else {
515            Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into())
516        }
517    }
518
519    fn read_counter(&self, countertype: &CounterType) -> CoreResult<CounterValue> {
520        let counters = self.active_counters.read().expect("Operation failed");
521        if counters.contains_key(countertype) {
522            // Reading a live value requires the Windows PDH API (PdhCollectQueryData
523            // / PdhGetFormattedCounterValue), which is not yet wired up. Returning a
524            // fabricated number here would be misleading, so report honestly that
525            // the value cannot be read rather than inventing one.
526            Err(HardwareCounterError::SystemError(format!(
527                "Reading Windows PDH counter {countertype:?} is not implemented"
528            ))
529            .into())
530        } else {
531            Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into())
532        }
533    }
534
535    fn read_counters(&self, countertypes: &[CounterType]) -> CoreResult<Vec<CounterValue>> {
536        let mut results = Vec::new();
537        for countertype in countertypes {
538            results.push(self.read_counter(countertype)?);
539        }
540        Ok(results)
541    }
542
543    fn reset_counter(&self, countertype: &CounterType) -> CoreResult<()> {
544        // PDH counters can't be reset
545        Err(
546            HardwareCounterError::InvalidConfiguration("PDH counters cannot be reset".to_string())
547                .into(),
548        )
549    }
550
551    fn is_overflowed(&self, _countertype: &CounterType) -> CoreResult<bool> {
552        // PDH counters don't typically overflow in our implementation
553        Ok(false)
554    }
555}
556
557/// macOS performance counter implementation using system profiling
558#[cfg(target_os = "macos")]
559pub struct MacOSCounter {
560    active_counters: RwLock<HashMap<CounterType, bool>>,
561}
562
563#[cfg(target_os = "macos")]
564impl MacOSCounter {
565    /// Create a new macOS counter
566    pub fn new() -> Self {
567        Self {
568            active_counters: RwLock::new(HashMap::new()),
569        }
570    }
571}
572
573#[cfg(target_os = "macos")]
574impl Default for MacOSCounter {
575    fn default() -> Self {
576        Self::new()
577    }
578}
579
580#[cfg(target_os = "macos")]
581impl PerformanceCounter for MacOSCounter {
582    fn available_counters(&self) -> Vec<CounterType> {
583        vec![
584            CounterType::CpuCycles,
585            CounterType::Instructions,
586            CounterType::CpuFrequency,
587            CounterType::CpuTemperature,
588        ]
589    }
590
591    fn is_available(&self, countertype: &CounterType) -> bool {
592        matches!(
593            countertype,
594            CounterType::CpuCycles
595                | CounterType::Instructions
596                | CounterType::CpuFrequency
597                | CounterType::CpuTemperature
598        )
599    }
600
601    fn start_counter(&self, countertype: &CounterType) -> CoreResult<()> {
602        if self.is_available(countertype) {
603            let mut counters = self.active_counters.write().expect("Operation failed");
604            counters.insert(countertype.clone(), true);
605            Ok(())
606        } else {
607            Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into())
608        }
609    }
610
611    fn stop_counter(&self, countertype: &CounterType) -> CoreResult<()> {
612        let mut counters = self.active_counters.write().expect("Operation failed");
613        if counters.remove(countertype).is_some() {
614            Ok(())
615        } else {
616            Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into())
617        }
618    }
619
620    fn read_counter(&self, countertype: &CounterType) -> CoreResult<CounterValue> {
621        let counters = self.active_counters.read().expect("Operation failed");
622        if counters.contains_key(countertype) {
623            // macOS exposes CPU performance counters through the private kperf /
624            // IOKit interfaces (or `powermetrics` for power/thermal), none of which
625            // are wired up here. Rather than returning a plausible-looking but
626            // fabricated value, report honestly that the read is unavailable.
627            Err(HardwareCounterError::SystemError(format!(
628                "Reading macOS hardware counter {countertype:?} is not implemented"
629            ))
630            .into())
631        } else {
632            Err(HardwareCounterError::CounterNotFound(format!("{countertype:?}")).into())
633        }
634    }
635
636    fn read_counters(&self, countertypes: &[CounterType]) -> CoreResult<Vec<CounterValue>> {
637        let mut results = Vec::new();
638        for countertype in countertypes {
639            results.push(self.read_counter(countertype)?);
640        }
641        Ok(results)
642    }
643
644    fn reset_counter(&self, countertype: &CounterType) -> CoreResult<()> {
645        // macOS counters typically can't be reset
646        Ok(())
647    }
648
649    fn is_overflowed(&self, _countertype: &CounterType) -> CoreResult<bool> {
650        // macOS hardware counters don't typically overflow in our implementation
651        Ok(false)
652    }
653}
654
655/// Hardware counter manager that provides a unified interface
656pub struct HardwareCounterManager {
657    backend: Box<dyn PerformanceCounter>,
658    session_counters: RwLock<HashMap<String, Vec<CounterType>>>,
659    counter_history: RwLock<HashMap<CounterType, Vec<CounterValue>>>,
660    max_history_size: usize,
661}
662
663impl HardwareCounterManager {
664    /// Create a new hardware counter manager with platform-specific backend
665    pub fn new() -> CoreResult<Self> {
666        let backend = Self::create_platform_backend()?;
667
668        Ok(Self {
669            backend,
670            session_counters: RwLock::new(HashMap::new()),
671            counter_history: RwLock::new(HashMap::new()),
672            max_history_size: 1000,
673        })
674    }
675
676    /// Create platform-specific backend
677    fn create_platform_backend() -> CoreResult<Box<dyn PerformanceCounter>> {
678        #[cfg(target_os = "linux")]
679        {
680            Ok(Box::new(LinuxPerfCounter::new()))
681        }
682
683        #[cfg(target_os = "windows")]
684        {
685            Ok(Box::new(WindowsPdhCounter::new()))
686        }
687
688        #[cfg(target_os = "macos")]
689        {
690            Ok(Box::new(MacOSCounter::new()))
691        }
692
693        #[cfg(not(any(target_os = "linux", target_os = "windows", target_os = "macos")))]
694        {
695            Err(HardwareCounterError::NotAvailable.into())
696        }
697    }
698
699    /// Get available counter types
700    pub fn available_counters(&self) -> Vec<CounterType> {
701        self.backend.available_counters()
702    }
703
704    /// Start a profiling session with specific counters
705    pub fn start_session(&self, sessionname: &str, counters: Vec<CounterType>) -> CoreResult<()> {
706        // Start all requested counters
707        for counter in &counters {
708            self.backend.start_counter(counter)?;
709        }
710
711        // Register session
712        let mut sessions = self.session_counters.write().expect("Operation failed");
713        sessions.insert(sessionname.to_string(), counters);
714
715        Ok(())
716    }
717
718    /// Stop a profiling session
719    pub fn stop_session(&self, sessionname: &str) -> CoreResult<()> {
720        let mut sessions = self.session_counters.write().expect("Operation failed");
721
722        if let Some(counters) = sessions.remove(sessionname) {
723            for counter in &counters {
724                self.backend.stop_counter(counter)?;
725            }
726            Ok(())
727        } else {
728            Err(HardwareCounterError::InvalidConfiguration(format!(
729                "Session not found: {sessionname}"
730            ))
731            .into())
732        }
733    }
734
735    /// Sample all active counters
736    pub fn sample_counters(&self) -> CoreResult<HashMap<CounterType, CounterValue>> {
737        let sessions = self.session_counters.read().expect("Operation failed");
738        let active_counters: Vec<CounterType> = sessions
739            .values()
740            .flat_map(|counters| counters.iter())
741            .cloned()
742            .collect::<std::collections::HashSet<_>>()
743            .into_iter()
744            .collect();
745
746        let values = self.backend.read_counters(&active_counters)?;
747
748        // Store in history
749        let mut history = self.counter_history.write().expect("Operation failed");
750        for value in &values {
751            let counter_history = history.entry(value.countertype.clone()).or_default();
752
753            counter_history.push(value.clone());
754
755            // Limit history size
756            if counter_history.len() > self.max_history_size {
757                counter_history.drain(0..counter_history.len() - self.max_history_size);
758            }
759        }
760
761        // Convert to HashMap
762        let result = values
763            .into_iter()
764            .map(|value| (value.countertype.clone(), value))
765            .collect();
766
767        Ok(result)
768    }
769
770    /// Get counter history
771    pub fn get_counter_history(&self, countertype: &CounterType) -> Vec<CounterValue> {
772        let history = self.counter_history.read().expect("Operation failed");
773        history.get(countertype).cloned().unwrap_or_default()
774    }
775
776    /// Calculate derived metrics
777    pub fn calculate_derived_metrics(
778        &self,
779        counters: &HashMap<CounterType, CounterValue>,
780    ) -> DerivedMetrics {
781        let mut metrics = DerivedMetrics::default();
782
783        // Instructions per cycle (IPC)
784        if let (Some(instructions), Some(cycles)) = (
785            counters.get(&CounterType::Instructions),
786            counters.get(&CounterType::CpuCycles),
787        ) {
788            if cycles.value > 0 {
789                metrics.instructions_per_cycle = instructions.value as f64 / cycles.value as f64;
790            }
791        }
792
793        // Cache hit rate
794        if let (Some(references), Some(misses)) = (
795            counters.get(&CounterType::CacheReferences),
796            counters.get(&CounterType::CacheMisses),
797        ) {
798            if references.value > 0 {
799                metrics.cache_hit_rate = 1.0 - (misses.value as f64 / references.value as f64);
800            }
801        }
802
803        // Branch prediction accuracy
804        if let (Some(instructions), Some(misses)) = (
805            counters.get(&CounterType::BranchInstructions),
806            counters.get(&CounterType::BranchMisses),
807        ) {
808            if instructions.value > 0 {
809                metrics.branch_prediction_accuracy =
810                    1.0 - (misses.value as f64 / instructions.value as f64);
811            }
812        }
813
814        // CPU utilization (cycles per second)
815        if let Some(cycles) = counters.get(&CounterType::CpuCycles) {
816            // Would need time delta for accurate calculation
817            metrics.cpu_utilization = cycles.value as f64 / 1_000_000.0; // Simplified
818        }
819
820        metrics
821    }
822
823    /// Generate performance report
824    pub fn generate_report(&self, sessionname: &str) -> PerformanceReport {
825        let sessions = self.session_counters.read().expect("Operation failed");
826        let counters = sessions.get(sessionname).cloned().unwrap_or_default();
827
828        let current_values = self.sample_counters().unwrap_or_default();
829        let derived_metrics = self.calculate_derived_metrics(&current_values);
830
831        PerformanceReport {
832            session_name: sessionname.to_string(),
833            timestamp: Instant::now(),
834            counter_values: current_values,
835            derived_metrics,
836            countersmonitored: counters,
837        }
838    }
839}
840
841impl Default for HardwareCounterManager {
842    fn default() -> Self {
843        Self::new().unwrap_or_else(|_| {
844            // Fallback with no-op backend
845            Self {
846                backend: Box::new(NoOpCounter),
847                session_counters: RwLock::new(HashMap::new()),
848                counter_history: RwLock::new(HashMap::new()),
849                max_history_size: 1000,
850            }
851        })
852    }
853}
854
855/// No-op counter for unsupported platforms
856pub struct NoOpCounter;
857
858impl PerformanceCounter for NoOpCounter {
859    fn available_counters(&self) -> Vec<CounterType> {
860        Vec::new()
861    }
862
863    fn is_available(&self, _countertype: &CounterType) -> bool {
864        false
865    }
866
867    fn start_counter(&self, _countertype: &CounterType) -> CoreResult<()> {
868        Err(HardwareCounterError::NotAvailable.into())
869    }
870
871    fn stop_counter(&self, _countertype: &CounterType) -> CoreResult<()> {
872        Err(HardwareCounterError::NotAvailable.into())
873    }
874
875    fn read_counter(&self, _countertype: &CounterType) -> CoreResult<CounterValue> {
876        Err(HardwareCounterError::NotAvailable.into())
877    }
878
879    fn read_counters(&self, _countertypes: &[CounterType]) -> CoreResult<Vec<CounterValue>> {
880        Err(HardwareCounterError::NotAvailable.into())
881    }
882
883    fn reset_counter(&self, _countertype: &CounterType) -> CoreResult<()> {
884        Err(HardwareCounterError::NotAvailable.into())
885    }
886
887    fn is_overflowed(&self, _countertype: &CounterType) -> CoreResult<bool> {
888        Err(HardwareCounterError::NotAvailable.into())
889    }
890}
891
892/// Derived performance metrics calculated from raw counters
893#[derive(Debug, Clone, Default)]
894pub struct DerivedMetrics {
895    /// Instructions per cycle
896    pub instructions_per_cycle: f64,
897    /// Cache hit rate (0.0 to 1.0)
898    pub cache_hit_rate: f64,
899    /// Branch prediction accuracy (0.0 to 1.0)
900    pub branch_prediction_accuracy: f64,
901    /// CPU utilization percentage
902    pub cpu_utilization: f64,
903    /// Memory bandwidth (bytes per second)
904    pub memorybandwidth: f64,
905    /// Power efficiency (instructions per watt)
906    pub power_efficiency: f64,
907}
908
909/// Performance report containing counter values and analysis
910#[derive(Debug, Clone)]
911pub struct PerformanceReport {
912    /// Session name
913    pub session_name: String,
914    /// Report timestamp
915    pub timestamp: Instant,
916    /// Raw counter values
917    pub counter_values: HashMap<CounterType, CounterValue>,
918    /// Derived metrics
919    pub derived_metrics: DerivedMetrics,
920    /// Counters that were monitored
921    pub countersmonitored: Vec<CounterType>,
922}
923
924impl PerformanceReport {
925    /// Format the report as human-readable text
926    pub fn formattext(&self) -> String {
927        let mut output = String::new();
928
929        output.push_str(&format!(
930            "Performance Report: {session_name}\n",
931            session_name = self.session_name
932        ));
933        output.push_str(&format!("Timestamp: {:?}\n\n", self.timestamp));
934
935        output.push_str("Raw Counters:\n");
936        for (countertype, value) in &self.counter_values {
937            output.push_str(&format!(
938                "  {}: {} {}\n",
939                countertype.description(),
940                value.scaled_value(),
941                countertype.unit()
942            ));
943        }
944
945        output.push_str("\nDerived Metrics:\n");
946        let metrics = &self.derived_metrics;
947        output.push_str(&format!(
948            "  Instructions per Cycle: {:.2}\n",
949            metrics.instructions_per_cycle
950        ));
951        output.push_str(&format!(
952            "  Cache Hit Rate: {:.2}%\n",
953            metrics.cache_hit_rate * 100.0
954        ));
955        output.push_str(&format!(
956            "  Branch Prediction Accuracy: {:.2}%\n",
957            metrics.branch_prediction_accuracy * 100.0
958        ));
959        output.push_str(&format!(
960            "  CPU Utilization: {:.2}%\n",
961            metrics.cpu_utilization
962        ));
963
964        output
965    }
966
967    /// Export report as JSON
968    pub fn to_json(&self) -> String {
969        // Simplified JSON serialization - real implementation would use serde
970        format!(
971            r#"{{"session":"{}","timestamp":"{}","metrics":{{"ipc":{:.2},"cache_hit_rate":{:.2},"branch_accuracy":{:.2}}}}}"#,
972            self.session_name,
973            self.timestamp.elapsed().as_secs(),
974            self.derived_metrics.instructions_per_cycle,
975            self.derived_metrics.cache_hit_rate,
976            self.derived_metrics.branch_prediction_accuracy
977        )
978    }
979}
980
981/// Global hardware counter manager instance
982static GLOBAL_MANAGER: std::sync::OnceLock<Arc<Mutex<HardwareCounterManager>>> =
983    std::sync::OnceLock::new();
984
985/// Get the global hardware counter manager
986#[allow(dead_code)]
987pub fn global_manager() -> Arc<Mutex<HardwareCounterManager>> {
988    GLOBAL_MANAGER
989        .get_or_init(|| Arc::new(Mutex::new(HardwareCounterManager::default())))
990        .clone()
991}
992
993/// Convenience functions for hardware performance monitoring
994pub mod utils {
995    use super::*;
996
997    /// Start monitoring basic CPU performance counters
998    pub fn start_basic_cpumonitoring(sessionname: &str) -> CoreResult<()> {
999        let manager = global_manager();
1000        let manager = manager.lock().expect("Operation failed");
1001
1002        let counters = vec![
1003            CounterType::CpuCycles,
1004            CounterType::Instructions,
1005            CounterType::CacheReferences,
1006            CounterType::CacheMisses,
1007        ];
1008
1009        manager.start_session(sessionname, counters)
1010    }
1011
1012    /// Start monitoring cache performance
1013    pub fn start_cachemonitoring(sessionname: &str) -> CoreResult<()> {
1014        let manager = global_manager();
1015        let manager = manager.lock().expect("Operation failed");
1016
1017        let counters = vec![
1018            CounterType::L1DCacheLoads,
1019            CounterType::L1DCacheLoadMisses,
1020            CounterType::L2CacheLoads,
1021            CounterType::L2CacheLoadMisses,
1022            CounterType::L3CacheLoads,
1023            CounterType::L3CacheLoadMisses,
1024        ];
1025
1026        manager.start_session(sessionname, counters)
1027    }
1028
1029    /// Get a quick performance snapshot
1030    pub fn get_performance_snapshot() -> CoreResult<HashMap<CounterType, CounterValue>> {
1031        let manager = global_manager();
1032        let manager = manager.lock().expect("Operation failed");
1033        manager.sample_counters()
1034    }
1035
1036    /// Check if hardware performance counters are available
1037    pub fn counters_available() -> bool {
1038        let manager = global_manager();
1039        let manager = manager.lock().expect("Operation failed");
1040        !manager.available_counters().is_empty()
1041    }
1042}
1043
1044#[cfg(test)]
1045mod tests {
1046    use super::*;
1047
1048    #[test]
1049    fn test_countertype_properties() {
1050        let counter = CounterType::CpuCycles;
1051        assert_eq!(counter.description(), "CPU cycles");
1052        assert_eq!(counter.unit(), "count");
1053
1054        let custom = CounterType::Custom("test".to_string());
1055        assert_eq!(custom.description(), "Custom counter");
1056        assert_eq!(custom.unit(), "unknown");
1057    }
1058
1059    #[test]
1060    fn test_counter_value() {
1061        let counter = CounterType::Instructions;
1062        let value = CounterValue::new(counter.clone(), 1000);
1063
1064        assert_eq!(value.countertype, counter);
1065        assert_eq!(value.value, 1000);
1066        assert_eq!(value.scaled_value(), 1000.0);
1067        assert!(value.enabled);
1068    }
1069
1070    #[test]
1071    fn test_derived_metrics() {
1072        let metrics = DerivedMetrics {
1073            instructions_per_cycle: 2.5,
1074            cache_hit_rate: 0.95,
1075            branch_prediction_accuracy: 0.98,
1076            ..Default::default()
1077        };
1078
1079        assert_eq!(metrics.instructions_per_cycle, 2.5);
1080        assert_eq!(metrics.cache_hit_rate, 0.95);
1081        assert_eq!(metrics.branch_prediction_accuracy, 0.98);
1082    }
1083
1084    #[test]
1085    fn test_performance_report() {
1086        let mut counter_values = HashMap::new();
1087        counter_values.insert(
1088            CounterType::CpuCycles,
1089            CounterValue::new(CounterType::CpuCycles, 1000000),
1090        );
1091
1092        let report = PerformanceReport {
1093            session_name: "test_session".to_string(),
1094            timestamp: Instant::now(),
1095            counter_values,
1096            derived_metrics: DerivedMetrics::default(),
1097            countersmonitored: vec![CounterType::CpuCycles],
1098        };
1099
1100        let text = report.formattext();
1101        assert!(text.contains("Performance Report: test_session"));
1102        assert!(text.contains("CPU cycles"));
1103
1104        let json = report.to_json();
1105        assert!(json.contains("test_session"));
1106    }
1107
1108    #[test]
1109    fn test_no_op_counter() {
1110        let counter = NoOpCounter;
1111        assert!(counter.available_counters().is_empty());
1112        assert!(!counter.is_available(&CounterType::CpuCycles));
1113        assert!(counter.start_counter(&CounterType::CpuCycles).is_err());
1114    }
1115
1116    #[test]
1117    fn test_global_manager() {
1118        let manager = global_manager();
1119
1120        // Should return the same instance
1121        let manager2 = global_manager();
1122        assert!(Arc::ptr_eq(&manager, &manager2));
1123    }
1124
1125    #[test]
1126    fn test_utils_functions() {
1127        // Test that utility functions don't panic
1128        let available = utils::counters_available();
1129        // Function should complete without panicking - no assertion needed
1130
1131        // Test starting monitoring (may fail on unsupported platforms)
1132        let result = utils::start_basic_cpumonitoring("test");
1133        // Either succeeds or fails with known error
1134        assert!(result.is_ok() || result.is_err());
1135    }
1136}