#![cfg(target_os = "linux")]
use std::io;
use std::os::fd::{AsRawFd, FromRawFd, OwnedFd};
use perf_event_open_sys as pes;
use pes::bindings::{
PERF_COUNT_HW_BRANCH_MISSES, PERF_COUNT_HW_CACHE_MISSES, PERF_COUNT_HW_CPU_CYCLES,
PERF_COUNT_HW_INSTRUCTIONS, PERF_FORMAT_TOTAL_TIME_ENABLED, PERF_FORMAT_TOTAL_TIME_RUNNING,
PERF_TYPE_HARDWARE, perf_event_attr,
};
#[derive(Debug, Clone, Copy, Default, serde::Serialize, serde::Deserialize)]
#[non_exhaustive]
pub struct VcpuPerfSample {
pub cycles: u64,
pub instructions: u64,
pub cache_misses: u64,
pub branch_misses: u64,
pub time_enabled_ns: u64,
pub time_running_ns: u64,
}
impl VcpuPerfSample {
pub fn ipc(&self) -> f64 {
if self.cycles == 0 {
0.0
} else {
self.instructions as f64 / self.cycles as f64
}
}
}
pub struct VcpuPerfCounters {
cycles: OwnedFd,
instructions: OwnedFd,
cache_misses: OwnedFd,
branch_misses: OwnedFd,
}
impl VcpuPerfCounters {
pub fn open(tid: libc::pid_t) -> io::Result<Self> {
let cycles = open_one(tid, PERF_COUNT_HW_CPU_CYCLES as u64)?;
let instructions = open_one(tid, PERF_COUNT_HW_INSTRUCTIONS as u64)?;
let cache_misses = open_one(tid, PERF_COUNT_HW_CACHE_MISSES as u64)?;
let branch_misses = open_one(tid, PERF_COUNT_HW_BRANCH_MISSES as u64)?;
Ok(Self {
cycles,
instructions,
cache_misses,
branch_misses,
})
}
pub fn read(&self) -> io::Result<VcpuPerfSample> {
let (cycles, time_enabled_ns, time_running_ns) = read_one(&self.cycles)?;
let (instructions, _, _) = read_one(&self.instructions)?;
let (cache_misses, _, _) = read_one(&self.cache_misses)?;
let (branch_misses, _, _) = read_one(&self.branch_misses)?;
Ok(VcpuPerfSample {
cycles,
instructions,
cache_misses,
branch_misses,
time_enabled_ns,
time_running_ns,
})
}
}
fn open_one(tid: libc::pid_t, config: u64) -> io::Result<OwnedFd> {
let mut attr = perf_event_attr {
size: std::mem::size_of::<perf_event_attr>() as u32,
type_: PERF_TYPE_HARDWARE,
config,
read_format: (PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING) as u64,
..Default::default()
};
attr.set_disabled(0);
attr.set_exclude_host(1);
attr.set_exclude_user(0);
attr.set_exclude_kernel(0);
attr.set_exclude_hv(0);
attr.set_pinned(0);
let fd = unsafe { pes::perf_event_open(&mut attr, tid, -1, -1, 0) };
if fd < 0 {
return Err(io::Error::last_os_error());
}
Ok(unsafe { OwnedFd::from_raw_fd(fd) })
}
fn read_one(fd: &OwnedFd) -> io::Result<(u64, u64, u64)> {
let mut buf = [0u64; 3];
let n = unsafe {
libc::read(
fd.as_raw_fd(),
buf.as_mut_ptr() as *mut libc::c_void,
std::mem::size_of_val(&buf),
)
};
if n < 0 {
return Err(io::Error::last_os_error());
}
if (n as usize) < std::mem::size_of_val(&buf) {
return Err(io::Error::new(
io::ErrorKind::UnexpectedEof,
format!(
"short read on perf fd: got {n} bytes, expected {}",
std::mem::size_of_val(&buf)
),
));
}
Ok((buf[0], buf[1], buf[2]))
}
pub struct PerfCountersCapture {
pub per_vcpu: Vec<VcpuPerfCounters>,
}
impl PerfCountersCapture {
pub fn open(tids: &[libc::pid_t]) -> io::Result<Self> {
let mut per_vcpu = Vec::with_capacity(tids.len());
for &tid in tids {
per_vcpu.push(VcpuPerfCounters::open(tid)?);
}
Ok(Self { per_vcpu })
}
pub fn read_all(&self) -> io::Result<Vec<VcpuPerfSample>> {
self.per_vcpu.iter().map(|p| p.read()).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn open_self_then_read_returns_consistent_fields() {
let tid = unsafe { libc::syscall(libc::SYS_gettid) } as libc::pid_t;
let counters = match VcpuPerfCounters::open(tid) {
Ok(c) => c,
Err(e) => {
eprintln!("perf_event_open unavailable in this env: {e}; skipping");
return;
}
};
let mut acc: u64 = 0;
for i in 0u64..1_000_000 {
unsafe { std::ptr::read_volatile(&i) };
acc = acc.wrapping_add(i);
}
std::hint::black_box(acc);
let sample = counters.read().expect("read perf counters");
assert!(
sample.time_running_ns <= sample.time_enabled_ns,
"time_running ({}) > time_enabled ({})",
sample.time_running_ns,
sample.time_enabled_ns,
);
assert!(sample.cycles < (1u64 << 63));
}
#[test]
fn ipc_zero_when_cycles_zero() {
let s = VcpuPerfSample::default();
assert_eq!(s.ipc(), 0.0);
}
#[test]
fn ipc_computes_instructions_over_cycles() {
let s = VcpuPerfSample {
cycles: 200,
instructions: 100,
..Default::default()
};
assert!((s.ipc() - 0.5).abs() < 1e-9);
}
#[test]
fn vcpu_perf_sample_ipc_productive_claim_passes() {
use crate::assert::Verdict;
let s = VcpuPerfSample {
cycles: 1_000,
instructions: 1_500,
..Default::default()
};
let mut v = Verdict::new();
let ipc = s.ipc();
crate::claim!(v, ipc).at_least(1.0);
crate::claim!(v, ipc).is_finite();
let r = v.into_result();
assert!(
r.passed,
"productive IPC=1.5 must satisfy at_least(1.0): {:?}",
r.details,
);
}
#[test]
fn vcpu_perf_sample_idle_ipc_fails_productive_claim() {
use crate::assert::Verdict;
let s = VcpuPerfSample::default();
let ipc = s.ipc();
let mut v = Verdict::new();
crate::claim!(v, ipc).at_least(1.0);
let r = v.into_result();
assert!(!r.passed, "idle vCPU's ipc=0 must fail at_least(1.0)");
let msg = &r.details[0].message;
assert!(msg.contains("at least 1"), "msg must name threshold: {msg}");
assert!(msg.contains("ipc"), "msg must include the label: {msg}");
}
#[test]
fn vcpu_perf_sample_multiplex_detect_via_eq_claim() {
use crate::assert::Verdict;
let s = VcpuPerfSample {
cycles: 500,
instructions: 800,
cache_misses: 10,
branch_misses: 2,
time_enabled_ns: 1_000_000,
time_running_ns: 600_000,
};
let mut v = Verdict::new();
crate::claim!(v, s.time_running_ns).eq(s.time_enabled_ns);
let r = v.into_result();
assert!(!r.passed, "multiplexed sample must fail eq claim");
let msg = &r.details[0].message;
assert!(
msg.contains("expected 1000000"),
"msg must reflect expected value: {msg}",
);
assert!(
msg.contains("was 600000"),
"msg must reflect observed value: {msg}",
);
}
}