use super::{describe_perf_error, events_list, PerfCapability, PerfCounters};
use std::os::unix::io::RawFd;
const PERF_TYPE_HARDWARE: u32 = 0;
const PERF_COUNT_HW_CPU_CYCLES: u64 = 0;
const PERF_COUNT_HW_INSTRUCTIONS: u64 = 1;
const PERF_COUNT_HW_CACHE_REFERENCES: u64 = 2;
const PERF_COUNT_HW_CACHE_MISSES: u64 = 3;
const PERF_COUNT_HW_STALLED_CYCLES_BACKEND: u64 = 8;
const ATTR_INHERIT: u64 = 1 << 1;
const ATTR_EXCLUDE_KERNEL: u64 = 1 << 5;
const ATTR_EXCLUDE_HV: u64 = 1 << 6;
#[repr(C)]
#[derive(Default, Debug, Clone, Copy)]
struct PerfEventAttr {
type_: u32,
size: u32,
config: u64,
sample_period_or_freq: u64,
sample_type: u64,
read_format: u64,
flags: u64,
wakeup_events_or_watermark: u32,
bp_type: u32,
bp_addr_or_config1: u64,
bp_len_or_config2: u64,
}
const PERF_ATTR_SIZE: u32 = std::mem::size_of::<PerfEventAttr>() as u32;
unsafe fn perf_event_open(
attr: *mut PerfEventAttr,
pid: libc::pid_t,
cpu: libc::c_int,
group_fd: libc::c_int,
flags: libc::c_ulong,
) -> libc::c_long {
libc::syscall(libc::SYS_perf_event_open, attr, pid, cpu, group_fd, flags)
}
#[derive(Debug)]
pub struct PerfGroup {
cycles: RawFd,
instructions: RawFd,
cache_refs: Option<RawFd>,
cache_misses: Option<RawFd>,
stalled_backend: Option<RawFd>,
last: Option<PerfCounters>,
}
impl PerfGroup {
pub fn open(pid: libc::pid_t) -> Result<Self, String> {
let cycles = open_one(pid, PERF_COUNT_HW_CPU_CYCLES)?;
let instructions = open_one(pid, PERF_COUNT_HW_INSTRUCTIONS).inspect_err(|_| {
unsafe { libc::close(cycles) };
})?;
let cache_refs = open_one(pid, PERF_COUNT_HW_CACHE_REFERENCES).ok();
let cache_misses = open_one(pid, PERF_COUNT_HW_CACHE_MISSES).ok();
let stalled_backend = open_one(pid, PERF_COUNT_HW_STALLED_CYCLES_BACKEND).ok();
Ok(Self {
cycles,
instructions,
cache_refs,
cache_misses,
stalled_backend,
last: None,
})
}
pub fn opened_events(&self) -> Vec<String> {
events_list(
self.cache_refs.is_some(),
self.cache_misses.is_some(),
self.stalled_backend.is_some(),
)
}
pub fn sample_delta(&mut self) -> Option<PerfCounters> {
let cur = PerfCounters {
cycles: read_counter(self.cycles)?,
instructions: read_counter(self.instructions)?,
cache_refs: self.cache_refs.and_then(read_counter).unwrap_or(0),
cache_misses: self.cache_misses.and_then(read_counter).unwrap_or(0),
stalled_backend: self.stalled_backend.and_then(read_counter).unwrap_or(0),
};
let delta = match self.last {
Some(prev) => cur.delta_since(&prev),
None => PerfCounters::default(),
};
self.last = Some(cur);
Some(delta)
}
}
impl Drop for PerfGroup {
fn drop(&mut self) {
unsafe {
libc::close(self.cycles);
libc::close(self.instructions);
if let Some(fd) = self.cache_refs {
libc::close(fd);
}
if let Some(fd) = self.cache_misses {
libc::close(fd);
}
if let Some(fd) = self.stalled_backend {
libc::close(fd);
}
}
}
}
fn open_one(pid: libc::pid_t, config: u64) -> Result<RawFd, String> {
let mut attr = PerfEventAttr {
type_: PERF_TYPE_HARDWARE,
size: PERF_ATTR_SIZE,
config,
flags: ATTR_INHERIT | ATTR_EXCLUDE_KERNEL | ATTR_EXCLUDE_HV,
..Default::default()
};
let fd = unsafe { perf_event_open(&mut attr, pid, -1, -1, 0) };
if fd < 0 {
let err = std::io::Error::last_os_error();
return Err(describe_perf_error(&err));
}
Ok(fd as RawFd)
}
fn read_counter(fd: RawFd) -> Option<u64> {
let mut buf = [0u8; 8];
let n = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut libc::c_void, buf.len()) };
if n != 8 {
return None;
}
Some(u64::from_ne_bytes(buf))
}
pub fn detect() -> PerfCapability {
let mut attr = PerfEventAttr {
type_: PERF_TYPE_HARDWARE,
size: PERF_ATTR_SIZE,
config: PERF_COUNT_HW_CPU_CYCLES,
flags: ATTR_EXCLUDE_KERNEL | ATTR_EXCLUDE_HV,
..Default::default()
};
let fd = unsafe { perf_event_open(&mut attr, 0, -1, -1, 0) };
if fd < 0 {
let err = std::io::Error::last_os_error();
return PerfCapability {
available: false,
events: vec![],
reason: Some(describe_perf_error(&err)),
};
}
unsafe { libc::close(fd as i32) };
PerfCapability {
available: true,
events: events_list(true, true, true),
reason: None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn self_open_and_read() {
let cap = detect();
if !cap.available {
eprintln!("perf unavailable: {:?}", cap.reason);
return;
}
let mut group = PerfGroup::open(0).expect("open self perf group");
let _ = group.sample_delta();
let mut acc: u64 = 0;
for i in 0..1_000_000u64 {
acc = acc.wrapping_add(i.wrapping_mul(7));
}
std::hint::black_box(acc);
let d = group.sample_delta().expect("read deltas");
if d.cycles == 0 && d.instructions == 0 {
return;
}
assert!(
d.instructions > 0,
"no instruction delta but cycles={}",
d.cycles
);
}
}