#[cfg(target_os = "linux")]
use std::sync::atomic::{compiler_fence, Ordering};
#[cfg(all(target_os = "linux", feature = "perf-mmap"))]
use super::perf_mmap::MmapState;
use super::error::{MeasurementError, MeasurementResult};
#[derive(Debug, Clone)]
pub enum PerfError {
UnsupportedPlatform,
PermissionDenied,
ConfigurationFailed(String),
}
impl std::fmt::Display for PerfError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PerfError::UnsupportedPlatform => write!(f, "perf timing requires Linux"),
PerfError::PermissionDenied => write!(
f,
"perf_event requires elevated privileges.\n\
\n\
To use cycle-accurate PMU timing, either:\n\
\n\
1. Run with sudo:\n\
cargo build --release\n\
sudo ./target/release/your_binary\n\
\n\
2. Grant CAP_PERFMON capability (kernel 5.8+):\n\
sudo setcap cap_perfmon=ep ./target/release/your_binary\n\
\n\
3. Lower perf_event_paranoid (system-wide, less secure):\n\
echo 2 | sudo tee /proc/sys/kernel/perf_event_paranoid\n\
\n\
Alternatively, the library will fall back to the standard timer with\n\
adaptive batching, which works for most cryptographic operations."
),
PerfError::ConfigurationFailed(msg) => write!(f, "perf configuration failed: {}", msg),
}
}
}
impl std::error::Error for PerfError {}
#[cfg(target_os = "linux")]
pub struct LinuxPerfTimer {
counter: ::perf_event2::Counter,
cycles_per_ns: f64,
#[cfg(feature = "perf-mmap")]
mmap_state: Option<MmapState>,
}
#[cfg(target_os = "linux")]
impl LinuxPerfTimer {
pub fn new() -> Result<Self, PerfError> {
use ::perf_event2::events::Hardware;
use ::perf_event2::Builder;
let mut counter = Builder::new(Hardware::CPU_CYCLES).build().map_err(|e| {
if e.kind() == std::io::ErrorKind::PermissionDenied {
PerfError::PermissionDenied
} else {
PerfError::ConfigurationFailed(format!("{:?}", e))
}
})?;
counter
.enable()
.map_err(|e| PerfError::ConfigurationFailed(format!("Failed to enable: {:?}", e)))?;
let cycles_per_ns = Self::calibrate(&mut counter);
#[cfg(feature = "perf-mmap")]
let mmap_state = Self::try_setup_mmap().ok();
#[cfg(feature = "perf-mmap")]
if mmap_state.is_some() {
tracing::info!("perf-mmap enabled: using zero-overhead userspace PMU reads");
}
Ok(Self {
counter,
cycles_per_ns,
#[cfg(feature = "perf-mmap")]
mmap_state,
})
}
fn calibrate(counter: &mut ::perf_event2::Counter) -> f64 {
use std::time::Instant;
let mut ratios = Vec::with_capacity(10);
for _ in 0..10 {
if counter.reset().is_err() {
continue;
}
let start_time = Instant::now();
let mut dummy: u64 = 1;
loop {
dummy = dummy.wrapping_mul(6364136223846793005).wrapping_add(1);
std::hint::black_box(dummy);
if dummy & 0xFFFF == 0 && start_time.elapsed().as_micros() >= 1000 {
break;
}
}
let cycles = match counter.read() {
Ok(c) => c,
Err(_) => continue,
};
let elapsed_nanos = start_time.elapsed().as_nanos() as u64;
if elapsed_nanos > 0 && cycles > 0 {
ratios.push(cycles as f64 / elapsed_nanos as f64);
}
}
if ratios.is_empty() {
return 3.0;
}
ratios.sort_by(|a, b| a.total_cmp(b));
ratios[ratios.len() / 2]
}
#[inline]
pub fn measure_cycles<F, T>(&mut self, f: F) -> MeasurementResult
where
F: FnOnce() -> T,
{
#[cfg(feature = "perf-mmap")]
if let Some(ref mmap) = self.mmap_state {
let start = mmap.read_counter()?;
compiler_fence(Ordering::SeqCst);
std::hint::black_box(f());
compiler_fence(Ordering::SeqCst);
let end = mmap.read_counter()?;
return Ok(end.saturating_sub(start));
}
if self.counter.reset().is_err() {
return Err(MeasurementError::SyscallFailed);
}
compiler_fence(Ordering::SeqCst);
std::hint::black_box(f());
compiler_fence(Ordering::SeqCst);
self.counter
.read()
.map_err(|_| MeasurementError::SyscallFailed)
}
#[inline]
pub fn cycles_to_ns(&self, cycles: u64) -> f64 {
cycles as f64 / self.cycles_per_ns
}
pub fn cycles_per_ns(&self) -> f64 {
self.cycles_per_ns
}
pub fn resolution_ns(&self) -> f64 {
1.0 / self.cycles_per_ns
}
#[cfg(feature = "perf-mmap")]
fn try_setup_mmap() -> Result<MmapState, Box<dyn std::error::Error>> {
use perf_event_open_sys::bindings::{
perf_event_attr, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE,
};
use std::os::unix::io::RawFd;
let mut attr = unsafe { std::mem::zeroed::<perf_event_attr>() };
attr.type_ = PERF_TYPE_HARDWARE;
attr.size = std::mem::size_of::<perf_event_attr>() as u32;
attr.config = PERF_COUNT_HW_CPU_CYCLES as u64;
attr.__bindgen_anon_3.config1 = 0x3; attr.__bindgen_anon_1.sample_period = 0;
attr.set_disabled(0);
attr.set_exclude_kernel(1);
attr.set_exclude_hv(1);
#[cfg(target_os = "linux")]
let cpu = unsafe {
let cpu_id = libc::sched_getcpu();
if cpu_id < 0 {
-1
} else {
cpu_id
}
};
#[cfg(not(target_os = "linux"))]
let cpu = -1;
let fd = unsafe {
libc::syscall(
libc::SYS_perf_event_open,
&attr as *const _,
0, cpu, -1, 0, )
};
if fd < 0 {
let err = std::io::Error::last_os_error();
tracing::debug!("perf_event_open for mmap failed: {}", err);
return Err(err.into());
}
let fd = fd as RawFd;
match unsafe { MmapState::new(fd) } {
Ok(mmap_state) => {
tracing::info!("ARM64 userspace PMU access enabled via mmap (perf-mmap feature)");
Ok(mmap_state)
}
Err(e) => {
tracing::debug!("mmap setup failed: {}", e);
unsafe { libc::close(fd) };
Err(Box::new(e))
}
}
}
}
#[cfg(target_os = "linux")]
impl std::fmt::Debug for LinuxPerfTimer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LinuxPerfTimer")
.field("cycles_per_ns", &self.cycles_per_ns)
.finish()
}
}
#[cfg(not(target_os = "linux"))]
#[derive(Debug)]
pub struct LinuxPerfTimer {
_private: (),
}
#[cfg(not(target_os = "linux"))]
impl LinuxPerfTimer {
pub fn new() -> Result<Self, PerfError> {
Err(PerfError::UnsupportedPlatform)
}
#[inline]
pub fn measure_cycles<F, T>(&mut self, _f: F) -> MeasurementResult
where
F: FnOnce() -> T,
{
Err(MeasurementError::SyscallFailed)
}
#[inline]
pub fn cycles_to_ns(&self, cycles: u64) -> f64 {
cycles as f64
}
pub fn cycles_per_ns(&self) -> f64 {
1.0
}
pub fn resolution_ns(&self) -> f64 {
1.0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
#[cfg(target_os = "linux")]
fn test_perf_timer_permissions() {
match LinuxPerfTimer::new() {
Ok(_) => {
}
Err(PerfError::PermissionDenied) => {
}
Err(_) => {
}
}
}
#[test]
#[cfg(not(target_os = "linux"))]
fn test_perf_unsupported_platform() {
assert!(matches!(
LinuxPerfTimer::new(),
Err(PerfError::UnsupportedPlatform)
));
}
}