use std::os::unix::io::RawFd;
use std::sync::atomic::{compiler_fence, Ordering};
use super::error::{MeasurementError, MeasurementResult};
#[repr(C)]
#[allow(non_camel_case_types)]
pub struct perf_event_mmap_page {
pub version: u32,
pub compat_version: u32,
pub lock: u32, pub index: u32, pub offset: i64, pub time_enabled: u64, pub time_running: u64, pub capabilities: u64, pub pmc_width: u16, pub time_shift: u16, pub time_mult: u32, pub time_offset: u64, pub time_zero: u64, pub size: u32, pub reserved_1: u32,
pub time_cycles: u64, pub time_mask: u64, pub reserved: [u8; 928], pub data_head: u64, pub data_tail: u64, pub data_offset: u64, pub data_size: u64, pub aux_head: u64, pub aux_tail: u64, pub aux_offset: u64, pub aux_size: u64, }
pub struct MmapState {
_mmap: memmap2::MmapRaw,
page_ptr: *const perf_event_mmap_page,
}
impl MmapState {
pub unsafe fn new(fd: RawFd) -> Result<Self, std::io::Error> {
let pagesize = libc::sysconf(libc::_SC_PAGESIZE) as usize;
let mmap = memmap2::MmapOptions::new().len(pagesize).map_raw(fd)?;
let page_ptr = mmap.as_ptr() as *const perf_event_mmap_page;
let caps = (*page_ptr).capabilities;
const CAP_USER_RDPMC: u64 = 1 << 2;
if (caps & CAP_USER_RDPMC) == 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::Unsupported,
"Userspace PMU access not enabled (cap_user_rdpmc == 0). \
Check: sudo sh -c 'echo 1 > /proc/sys/kernel/perf_user_access'",
));
}
Ok(Self {
_mmap: mmap,
page_ptr,
})
}
#[inline]
pub fn read_counter(&self) -> MeasurementResult {
const FAST_RETRIES: usize = 1000;
const MAX_RETRIES: usize = 50_000;
for attempt in 0..MAX_RETRIES {
if let Some(val) = unsafe { self.try_read_counter() } {
return Ok(val);
}
if attempt >= FAST_RETRIES {
std::thread::yield_now();
}
}
tracing::error!(
"perf_mmap seqlock retry exhausted after {} attempts - \
system under extreme load or PMU constantly multiplexed",
MAX_RETRIES
);
Err(MeasurementError::RetryExhausted)
}
#[inline]
unsafe fn try_read_counter(&self) -> Option<u64> {
let page = &*self.page_ptr;
let seq = atomic_load(&page.lock, Ordering::Acquire);
if (seq & 1) != 0 {
return None;
}
compiler_fence(Ordering::SeqCst);
let index = read_once!(page.index);
let offset = read_once!(page.offset);
let pmc_width = read_once!(page.pmc_width);
if index == 0 {
return None; }
if pmc_width == 0 || pmc_width > 64 {
return None; }
let pmc_value = mrs_pmccntr_el0();
compiler_fence(Ordering::SeqCst);
let nseq = atomic_load(&page.lock, Ordering::Acquire);
if seq != nseq {
return None; }
Some(compute_counter(offset, pmc_value, pmc_width))
}
}
unsafe impl Send for MmapState {}
unsafe impl Sync for MmapState {}
#[cfg(target_arch = "aarch64")]
#[inline(always)]
unsafe fn mrs_pmccntr_el0() -> u64 {
let val: u64;
std::arch::asm!(
"mrs {}, pmccntr_el0",
out(reg) val,
options(nostack, nomem, preserves_flags),
);
val
}
#[cfg(not(target_arch = "aarch64"))]
#[inline(always)]
unsafe fn mrs_pmccntr_el0() -> u64 {
unreachable!("mrs_pmccntr_el0 should only be called on aarch64")
}
#[inline]
fn sign_extend(val: u64, width: u16) -> i64 {
let shift = 64 - width as u32;
((val << shift) as i64) >> shift
}
#[inline]
fn compute_counter(offset: i64, pmc_value: u64, width: u16) -> u64 {
offset.wrapping_add(sign_extend(pmc_value, width)) as u64
}
#[inline]
unsafe fn atomic_load<T: Copy>(ptr: *const T, _order: Ordering) -> T {
std::ptr::read_volatile(ptr)
}
macro_rules! read_once {
($expr:expr) => {
std::ptr::read_volatile(&$expr)
};
}
pub(crate) use read_once;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sign_extend_32bit() {
assert_eq!(sign_extend(0xFFFFFFFF, 32), -1);
assert_eq!(sign_extend(0x80000000, 32), -2147483648);
assert_eq!(sign_extend(0x7FFFFFFF, 32), 2147483647);
assert_eq!(sign_extend(0, 32), 0);
}
#[test]
fn test_sign_extend_40bit() {
assert_eq!(sign_extend(0xFFFFFFFFFF, 40), -1);
assert_eq!(sign_extend(0x8000000000, 40), -549755813888);
assert_eq!(sign_extend(0x7FFFFFFFFF, 40), 549755813887);
assert_eq!(sign_extend(0, 40), 0);
}
#[test]
fn test_sign_extend_64bit() {
assert_eq!(sign_extend(0xFFFFFFFFFFFFFFFF, 64), -1);
assert_eq!(sign_extend(0x8000000000000000, 64), -9223372036854775808);
assert_eq!(sign_extend(0, 64), 0);
}
#[test]
fn test_compute_counter() {
assert_eq!(compute_counter(0, 0x1000, 32), 0x1000);
assert_eq!(compute_counter(100, 0x1000, 32), 100 + 0x1000);
assert_eq!(
compute_counter(-100, 0x1000, 32),
(0x1000u64).wrapping_sub(100)
);
assert_eq!(
compute_counter(0, 0xFFFFFFFF, 32),
0xFFFFFFFFFFFFFFFF );
}
}