use std::collections::HashMap;
use std::os::fd::OwnedFd;
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use vmm_sys_util::ioctl_io_nr;
ioctl_io_nr!(KVM_GET_STATS_FD, kvm_bindings::KVMIO, 0xce);
const DESC_FIXED_SIZE: usize = 16;
fn get_stats_fd<F: AsRawFd>(fd: &F) -> Option<OwnedFd> {
let ret = unsafe { libc::ioctl(fd.as_raw_fd(), KVM_GET_STATS_FD()) };
if ret < 0 {
None
} else {
Some(unsafe { OwnedFd::from_raw_fd(ret) })
}
}
#[derive(Debug, Clone)]
struct StatDesc {
name: String,
size: usize,
offset: usize,
}
#[derive(Debug)]
struct StatsMeta {
data_offset: usize,
data_size: usize,
descs: Vec<StatDesc>,
}
fn read_initial(fd: RawFd) -> Option<Vec<u8>> {
let mut buf = Vec::with_capacity(8192);
let mut chunk = [0u8; 4096];
loop {
let n = unsafe { libc::read(fd, chunk.as_mut_ptr() as *mut _, chunk.len()) };
if n < 0 {
return None;
}
if n == 0 {
break;
}
buf.extend_from_slice(&chunk[..n as usize]);
}
if buf.len() < 24 {
return None;
}
Some(buf)
}
fn parse_meta_from_buf(buf: &[u8]) -> Option<StatsMeta> {
let name_size = u32::from_ne_bytes(buf[4..8].try_into().ok()?) as usize;
let num_desc = u32::from_ne_bytes(buf[8..12].try_into().ok()?) as usize;
let desc_offset = u32::from_ne_bytes(buf[16..20].try_into().ok()?) as usize;
let data_offset = u32::from_ne_bytes(buf[20..24].try_into().ok()?) as usize;
let desc_stride = DESC_FIXED_SIZE + name_size;
let mut descs = Vec::with_capacity(num_desc);
for i in 0..num_desc {
let d_start = desc_offset + i * desc_stride;
if d_start + desc_stride > buf.len() {
break;
}
let d = &buf[d_start..d_start + desc_stride];
let size = u16::from_ne_bytes(d[6..8].try_into().ok()?) as usize;
let offset = u32::from_ne_bytes(d[8..12].try_into().ok()?) as usize;
let name_bytes = &d[DESC_FIXED_SIZE..DESC_FIXED_SIZE + name_size];
let name_end = name_bytes.iter().position(|&b| b == 0).unwrap_or(name_size);
let name = String::from_utf8_lossy(&name_bytes[..name_end]).to_string();
descs.push(StatDesc { name, size, offset });
}
let data_size = buf.len().saturating_sub(data_offset);
Some(StatsMeta {
data_offset,
data_size,
descs,
})
}
fn pread_data(fd: RawFd, meta: &StatsMeta) -> Option<Vec<u8>> {
let mut buf = vec![0u8; meta.data_size];
let n = unsafe {
libc::pread(
fd,
buf.as_mut_ptr() as *mut _,
meta.data_size,
meta.data_offset as libc::off_t,
)
};
if n < 0 {
return None;
}
buf.truncate(n as usize);
Some(buf)
}
fn extract_stats(meta: &StatsMeta, data: &[u8]) -> HashMap<String, u64> {
let mut map = HashMap::with_capacity(meta.descs.len());
for desc in &meta.descs {
if desc.size != 1 {
continue;
}
let off = desc.offset;
if off + 8 > data.len() {
continue;
}
let val = u64::from_ne_bytes(data[off..off + 8].try_into().unwrap());
map.insert(desc.name.clone(), val);
}
map
}
struct VcpuStatsReader {
fd: OwnedFd,
meta: StatsMeta,
}
impl VcpuStatsReader {
fn open<F: AsRawFd>(vcpu: &F) -> Option<Self> {
let fd = get_stats_fd(vcpu)?;
let buf = read_initial(fd.as_raw_fd())?;
let meta = parse_meta_from_buf(&buf)?;
Some(VcpuStatsReader { fd, meta })
}
fn read_snapshot(&self) -> HashMap<String, u64> {
let data = match pread_data(self.fd.as_raw_fd(), &self.meta) {
Some(d) => d,
None => return HashMap::new(),
};
extract_stats(&self.meta, &data)
}
}
pub(crate) struct StatsContext {
readers: Vec<VcpuStatsReader>,
}
impl StatsContext {
pub(crate) fn read_stats(&self) -> crate::vmm::KvmStatsTotals {
let per_vcpu = self.readers.iter().map(|r| r.read_snapshot()).collect();
crate::vmm::KvmStatsTotals { per_vcpu }
}
}
pub(crate) fn open_stats_context(vcpus: &[kvm_ioctls::VcpuFd]) -> Option<StatsContext> {
let mut readers = Vec::with_capacity(vcpus.len());
for vcpu in vcpus {
readers.push(VcpuStatsReader::open(vcpu)?);
}
Some(StatsContext { readers })
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vmm::topology::Topology;
use crate::vmm::x86_64::kvm::KtstrKvm;
#[test]
fn stats_fd_returns_some_on_modern_kernel() {
let topo = Topology {
sockets: 1,
cores_per_socket: 1,
threads_per_core: 1,
};
let vm = KtstrKvm::new(topo, 64, false).unwrap();
if let Some(fd) = get_stats_fd(&vm.vm_fd) {
assert!(fd.as_raw_fd() >= 0);
}
}
#[test]
fn vcpu_stats_fd_returns_some() {
let topo = Topology {
sockets: 1,
cores_per_socket: 1,
threads_per_core: 1,
};
let vm = KtstrKvm::new(topo, 64, false).unwrap();
if let Some(fd) = get_stats_fd(&vm.vcpus[0]) {
assert!(fd.as_raw_fd() >= 0);
}
}
#[test]
fn parse_meta_from_vcpu() {
let topo = Topology {
sockets: 1,
cores_per_socket: 1,
threads_per_core: 1,
};
let vm = KtstrKvm::new(topo, 64, false).unwrap();
if let Some(fd) = get_stats_fd(&vm.vcpus[0]) {
let buf = read_initial(fd.as_raw_fd()).unwrap();
let meta = parse_meta_from_buf(&buf).unwrap();
assert!(!meta.descs.is_empty());
assert!(meta.descs.iter().any(|d| d.name == "exits"));
}
}
#[test]
fn pread_data_after_initial_read() {
let topo = Topology {
sockets: 1,
cores_per_socket: 1,
threads_per_core: 1,
};
let vm = KtstrKvm::new(topo, 64, false).unwrap();
if let Some(fd) = get_stats_fd(&vm.vcpus[0]) {
let buf = read_initial(fd.as_raw_fd()).unwrap();
let meta = parse_meta_from_buf(&buf).unwrap();
let data = pread_data(fd.as_raw_fd(), &meta);
assert!(data.is_some(), "pread should work after initial read");
assert!(!data.unwrap().is_empty());
}
}
#[test]
fn vcpu_reader_opens_and_reads() {
let topo = Topology {
sockets: 1,
cores_per_socket: 2,
threads_per_core: 1,
};
let vm = KtstrKvm::new(topo, 64, false).unwrap();
if let Some(reader) = VcpuStatsReader::open(&vm.vcpus[0]) {
let snap = reader.read_snapshot();
assert!(snap.contains_key("exits"), "should contain 'exits'");
}
}
#[test]
fn stats_context_opens_and_reads() {
let topo = Topology {
sockets: 1,
cores_per_socket: 2,
threads_per_core: 1,
};
let vm = KtstrKvm::new(topo, 64, false).unwrap();
if let Some(ctx) = open_stats_context(&vm.vcpus) {
assert_eq!(ctx.readers.len(), 2);
let stats = ctx.read_stats();
assert_eq!(stats.per_vcpu.len(), 2);
}
}
#[test]
fn interesting_stats_are_trust_relevant() {
use crate::vmm::KVM_INTERESTING_STATS;
assert!(KVM_INTERESTING_STATS.contains(&"exits"));
assert!(KVM_INTERESTING_STATS.contains(&"halt_exits"));
assert!(KVM_INTERESTING_STATS.contains(&"halt_successful_poll"));
assert!(KVM_INTERESTING_STATS.contains(&"halt_attempted_poll"));
assert!(KVM_INTERESTING_STATS.contains(&"halt_wait_ns"));
assert!(KVM_INTERESTING_STATS.contains(&"signal_exits"));
assert!(KVM_INTERESTING_STATS.contains(&"hypercalls"));
assert!(KVM_INTERESTING_STATS.contains(&"preemption_reported"));
}
#[test]
fn kvm_stats_totals_serde_roundtrip() {
use crate::vmm::KvmStatsTotals;
let mut totals = KvmStatsTotals::default();
let mut m = HashMap::new();
m.insert("exits".to_string(), 1000u64);
m.insert("halt_exits".to_string(), 200u64);
totals.per_vcpu = vec![m];
let json = serde_json::to_string(&totals).unwrap();
let loaded: KvmStatsTotals = serde_json::from_str(&json).unwrap();
assert_eq!(loaded.per_vcpu.len(), 1);
assert_eq!(loaded.avg("exits"), 1000);
assert_eq!(loaded.avg("halt_exits"), 200);
}
#[test]
fn sum_and_avg() {
use crate::vmm::KvmStatsTotals;
let mut d = KvmStatsTotals::default();
let mut m1 = HashMap::new();
m1.insert("exits".to_string(), 100u64);
let mut m2 = HashMap::new();
m2.insert("exits".to_string(), 200u64);
d.per_vcpu = vec![m1, m2];
assert_eq!(d.sum("exits"), 300);
assert_eq!(d.avg("exits"), 150);
assert_eq!(d.sum("nonexistent"), 0);
assert_eq!(d.avg("nonexistent"), 0);
}
#[test]
fn avg_empty() {
use crate::vmm::KvmStatsTotals;
let d = KvmStatsTotals::default();
assert_eq!(d.avg("exits"), 0);
}
}