use std::sync::atomic::{compiler_fence, Ordering};
use super::error::{MeasurementError, MeasurementResult};
#[derive(Debug, Clone)]
pub enum PmuError {
UnsupportedPlatform,
FrameworkNotFound,
PermissionDenied,
ConfigurationFailed(String),
ConcurrentAccess,
}
impl std::fmt::Display for PmuError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PmuError::UnsupportedPlatform => write!(f, "PMU timing requires Apple Silicon"),
PmuError::FrameworkNotFound => write!(f, "kperf framework not found"),
PmuError::PermissionDenied => write!(
f,
"kperf requires root privileges.\n\
\n\
To use cycle-accurate PMU timing:\n\
\n\
1. Build first: cargo build --release\n\
2. Run with sudo: sudo ./target/release/your_binary\n\
\n\
Alternatively, the library will fall back to the standard timer with\n\
adaptive batching, which works for most cryptographic operations."
),
PmuError::ConfigurationFailed(msg) => write!(f, "PMU configuration failed: {}", msg),
PmuError::ConcurrentAccess => write!(
f,
"Another process holds exclusive PMU access.\n\
\n\
The macOS kpc API requires system-wide exclusive access to PMU counters.\n\
When running tests in parallel (e.g., via nextest), only one process can\n\
use kperf at a time.\n\
\n\
Solutions:\n\
1. Run with single thread: cargo nextest run --test-threads=1\n\
2. Use the kperf profile: cargo nextest run --profile kperf\n\
\n\
The library will automatically fall back to the standard timer."
),
}
}
}
impl std::error::Error for PmuError {}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
pub struct PmuTimer {
counter: kperf_rs::PerfCounter,
cycles_per_ns: f64,
_lock_guard: super::kperf_lock::LockGuard,
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
impl PmuTimer {
pub fn new() -> Result<Self, PmuError> {
use super::kperf_lock::{try_acquire_default, LockResult};
let lock_guard = match try_acquire_default() {
LockResult::Acquired(guard) => guard,
LockResult::Timeout => {
return Err(PmuError::ConcurrentAccess);
}
LockResult::IoError(e) => {
return Err(PmuError::ConfigurationFailed(format!(
"Failed to acquire kperf lock: {}",
e
)));
}
};
kperf_rs::check_kpc_permission().map_err(|e| match e {
kperf_rs::error::KperfError::PermissionDenied => PmuError::PermissionDenied,
_ => PmuError::ConfigurationFailed(format!("{:?}", e)),
})?;
let mut counter = kperf_rs::PerfCounterBuilder::new()
.track_event(kperf_rs::event::Event::Cycles)
.build_counter()
.map_err(|e| PmuError::ConfigurationFailed(format!("{:?}", e)))?;
counter
.start()
.map_err(|e| PmuError::ConfigurationFailed(format!("Failed to start: {:?}", e)))?;
let cycles_per_ns = Self::calibrate(&mut counter);
Ok(Self {
counter,
cycles_per_ns,
_lock_guard: lock_guard,
})
}
fn calibrate(counter: &mut kperf_rs::PerfCounter) -> f64 {
use std::time::Instant;
let mut ratios = Vec::with_capacity(10);
let mut prev_cycles = match counter.read() {
Ok(c) => c,
Err(_) => return 3.0, };
for _ in 0..10 {
let start_time = Instant::now();
let mut dummy: u64 = 1;
loop {
dummy = dummy.wrapping_mul(6364136223846793005).wrapping_add(1);
std::hint::black_box(dummy);
if dummy & 0xFFFF == 0 && start_time.elapsed().as_micros() >= 1000 {
break;
}
}
let current_cycles = match counter.read() {
Ok(c) => c,
Err(_) => continue,
};
let elapsed_nanos = start_time.elapsed().as_nanos() as u64;
let delta_cycles = current_cycles.saturating_sub(prev_cycles);
prev_cycles = current_cycles;
if elapsed_nanos > 0 && delta_cycles > 0 {
ratios.push(delta_cycles as f64 / elapsed_nanos as f64);
}
}
if ratios.is_empty() {
return 3.0;
}
ratios.sort_by(|a, b| a.total_cmp(b));
ratios[ratios.len() / 2]
}
#[inline]
pub fn measure_cycles<F, T>(&mut self, f: F) -> MeasurementResult
where
F: FnOnce() -> T,
{
let start = self
.counter
.read()
.map_err(|_| MeasurementError::SyscallFailed)?;
compiler_fence(Ordering::SeqCst);
std::hint::black_box(f());
compiler_fence(Ordering::SeqCst);
let end = self
.counter
.read()
.map_err(|_| MeasurementError::SyscallFailed)?;
Ok(end.saturating_sub(start))
}
#[inline]
pub fn cycles_to_ns(&self, cycles: u64) -> f64 {
cycles as f64 / self.cycles_per_ns
}
pub fn cycles_per_ns(&self) -> f64 {
self.cycles_per_ns
}
pub fn resolution_ns(&self) -> f64 {
1.0 / self.cycles_per_ns
}
}
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
impl std::fmt::Debug for PmuTimer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PmuTimer")
.field("cycles_per_ns", &self.cycles_per_ns)
.finish()
}
}
#[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
#[derive(Debug)]
pub struct PmuTimer {
_private: (),
}
#[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
impl PmuTimer {
pub fn new() -> Result<Self, PmuError> {
Err(PmuError::UnsupportedPlatform)
}
#[inline]
pub fn measure_cycles<F, T>(&mut self, _f: F) -> MeasurementResult
where
F: FnOnce() -> T,
{
Err(MeasurementError::SyscallFailed)
}
#[inline]
pub fn cycles_to_ns(&self, cycles: u64) -> f64 {
cycles as f64
}
pub fn cycles_per_ns(&self) -> f64 {
1.0
}
pub fn resolution_ns(&self) -> f64 {
1.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[cfg(all(target_os = "macos", target_arch = "aarch64"))]
fn test_pmu_timer_requires_root() {
match PmuTimer::new() {
Ok(_) => {
}
Err(PmuError::PermissionDenied) => {
}
Err(_) => {
}
}
}
#[test]
#[cfg(not(all(target_os = "macos", target_arch = "aarch64")))]
fn test_pmu_unsupported_platform() {
assert!(matches!(
PmuTimer::new(),
Err(PmuError::UnsupportedPlatform)
));
}
}