#![allow(unexpected_cfgs)]
use std::error::Error;
use std::time::Instant;
macro_rules! really_warn {
($msg:literal $($rest:tt)*) => {
error!(concat!("[WARNING] ", $msg) $($rest)*)
}
}
pub enum Counter {
WallTime(WallTime),
Instructions(Instructions),
InstructionsMinusIrqs(InstructionsMinusIrqs),
InstructionsMinusRaw0420(InstructionsMinusRaw0420),
}
impl Counter {
pub fn by_name(name: &str) -> Result<Self, Box<dyn Error + Send + Sync>> {
Ok(match name {
WallTime::NAME => Counter::WallTime(WallTime::new()),
Instructions::NAME => Counter::Instructions(Instructions::new()?),
InstructionsMinusIrqs::NAME => {
Counter::InstructionsMinusIrqs(InstructionsMinusIrqs::new()?)
}
InstructionsMinusRaw0420::NAME => {
Counter::InstructionsMinusRaw0420(InstructionsMinusRaw0420::new()?)
}
_ => return Err(format!("{:?} is not a valid counter name", name).into()),
})
}
pub(super) fn describe_as_json(&self) -> String {
let (name, units) = match self {
Counter::WallTime(_) => (
WallTime::NAME,
r#"[["ns", 1], ["μs", 1000], ["ms", 1000000], ["s", 1000000000]]"#,
),
Counter::Instructions(_) => (Instructions::NAME, r#"[["instructions", 1]]"#),
Counter::InstructionsMinusIrqs(_) => {
(InstructionsMinusIrqs::NAME, r#"[["instructions", 1]]"#)
}
Counter::InstructionsMinusRaw0420(_) => {
(InstructionsMinusRaw0420::NAME, r#"[["instructions", 1]]"#)
}
};
format!(r#"{{ "name": "{}", "units": {} }}"#, name, units)
}
#[inline]
pub(super) fn since_start(&self) -> u64 {
match self {
Counter::WallTime(counter) => counter.since_start(),
Counter::Instructions(counter) => counter.since_start(),
Counter::InstructionsMinusIrqs(counter) => counter.since_start(),
Counter::InstructionsMinusRaw0420(counter) => counter.since_start(),
}
}
}
pub struct WallTime {
start: Instant,
}
impl WallTime {
const NAME: &'static str = "wall-time";
pub fn new() -> Self {
WallTime {
start: Instant::now(),
}
}
#[inline]
fn since_start(&self) -> u64 {
self.start.elapsed().as_nanos() as u64
}
}
pub struct Instructions {
instructions: hw::Counter,
start: u64,
}
impl Instructions {
const NAME: &'static str = "instructions:u";
pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
let model = hw::CpuModel::detect()?;
let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?;
let start = instructions.read();
Ok(Instructions {
instructions,
start,
})
}
#[inline]
fn since_start(&self) -> u64 {
self.instructions.read().wrapping_sub(self.start)
}
}
pub struct InstructionsMinusIrqs {
instructions: hw::Counter,
irqs: hw::Counter,
start: u64,
}
impl InstructionsMinusIrqs {
const NAME: &'static str = "instructions-minus-irqs:u";
pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
let model = hw::CpuModel::detect()?;
let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?;
let irqs = hw::Counter::new(&model, HwCounterType::Irqs)?;
let (start_instructions, start_irqs) = (&instructions, &irqs).read();
let start = start_instructions.wrapping_sub(start_irqs);
Ok(InstructionsMinusIrqs {
instructions,
irqs,
start,
})
}
#[inline]
fn since_start(&self) -> u64 {
let (instructions, irqs) = (&self.instructions, &self.irqs).read();
instructions.wrapping_sub(irqs).wrapping_sub(self.start)
}
}
pub struct InstructionsMinusRaw0420(InstructionsMinusIrqs);
impl InstructionsMinusRaw0420 {
const NAME: &'static str = "instructions-minus-r0420:u";
pub fn new() -> Result<Self, Box<dyn Error + Send + Sync>> {
let model = hw::CpuModel::detect()?;
let instructions = hw::Counter::new(&model, HwCounterType::Instructions)?;
let irqs = hw::Counter::new(&model, HwCounterType::Raw0420)?;
let (start_instructions, start_irqs) = (&instructions, &irqs).read();
let start = start_instructions.wrapping_sub(start_irqs);
Ok(InstructionsMinusRaw0420(InstructionsMinusIrqs {
instructions,
irqs,
start,
}))
}
#[inline]
fn since_start(&self) -> u64 {
self.0.since_start()
}
}
trait HwCounterRead {
type Output;
fn read(&self) -> Self::Output;
}
enum HwCounterType {
Instructions,
Irqs,
Raw0420,
}
const BUG_REPORT_MSG: &str =
"please report this to https://github.com/rust-lang/measureme/issues/new";
#[cfg(all(target_arch = "x86_64", target_os = "linux", not(target_env = "ohos")))]
mod hw {
use memmap2::{Mmap, MmapOptions};
use perf_event_open_sys::{bindings::*, perf_event_open};
use std::arch::asm;
use std::convert::TryInto;
use std::error::Error;
use std::fs;
use std::mem;
use std::os::unix::io::FromRawFd;
pub(super) struct Counter {
mmap: Mmap,
reg_idx: u32,
}
impl Counter {
pub(super) fn new(
model: &CpuModel,
counter_type: super::HwCounterType,
) -> Result<Self, Box<dyn Error + Send + Sync>> {
let (type_, hw_id) = match counter_type {
super::HwCounterType::Instructions => {
(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS)
}
super::HwCounterType::Irqs => (PERF_TYPE_RAW, model.irqs_counter_config()?),
super::HwCounterType::Raw0420 => {
match model {
CpuModel::Amd(AmdGen::Zen) => {}
_ => really_warn!(
"Counter::new: the undocumented `r0420` performance \
counter has only been observed on AMD Zen CPUs"
),
}
(PERF_TYPE_RAW, 0x04_20)
}
};
Self::with_type_and_hw_id(type_, hw_id)
}
fn with_type_and_hw_id(
type_: perf_type_id,
hw_id: u32,
) -> Result<Self, Box<dyn Error + Send + Sync>> {
let mut attrs = perf_event_attr {
size: mem::size_of::<perf_event_attr>().try_into().unwrap(),
type_,
config: hw_id.into(),
..perf_event_attr::default()
};
let pid = 0;
let cpu = -1;
let group_fd = -1;
attrs.set_exclude_kernel(1);
attrs.set_exclude_hv(1);
let file = unsafe {
let fd =
perf_event_open(&mut attrs, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC.into());
if fd < 0 {
Err(std::io::Error::from_raw_os_error(-fd))
} else {
Ok(fs::File::from_raw_fd(fd))
}
};
let file = file.map_err(|e| format!("perf_event_open failed: {:?}", e))?;
let mmap = unsafe {
MmapOptions::new()
.len(mem::size_of::<perf_event_mmap_page>())
.map(&file)
};
let mmap = mmap.map_err(|e| format!("perf_event_mmap_page: mmap failed: {:?}", e))?;
let mut counter = Counter { mmap, reg_idx: 0 };
let (version, compat_version, caps, index, pmc_width) = counter
.access_mmap_page_with_seqlock(|mp| {
(
mp.version,
mp.compat_version,
unsafe { mp.__bindgen_anon_1.__bindgen_anon_1 },
mp.index,
mp.pmc_width,
)
});
info!(
"Counter::new: version={} compat_version={} index={:#x}",
version, compat_version, index,
);
if caps.cap_user_rdpmc() == 0 {
return Err(format!(
"perf_event_mmap_page: missing cap_user_rdpmc{}",
if caps.cap_bit0_is_deprecated() == 0 && caps.cap_bit0() == 1 {
" (ignoring legacy/broken rdpmc support)"
} else {
""
}
)
.into());
}
if index == 0 {
return Err(format!(
"perf_event_mmap_page: no allocated hardware register (ran out?)"
)
.into());
}
counter.reg_idx = index - 1;
if (cfg!(not(accurate_seqlock_rdpmc)) || true) && pmc_width != 48 {
return Err(format!(
"perf_event_mmap_page: {}-bit hardware counter found, only 48-bit supported",
pmc_width
)
.into());
}
Ok(counter)
}
#[inline]
fn access_mmap_page_with_seqlock<T>(
&self,
attempt: impl Fn(&perf_event_mmap_page) -> T,
) -> T {
let mmap_page = unsafe { &*(self.mmap.as_ptr() as *const perf_event_mmap_page) };
let barrier = || std::sync::atomic::fence(std::sync::atomic::Ordering::Acquire);
loop {
let seq_lock = mmap_page.lock;
barrier();
let result = attempt(mmap_page);
barrier();
if mmap_page.lock == seq_lock {
return result;
}
}
}
}
impl super::HwCounterRead for Counter {
type Output = u64;
#[inline]
fn read(&self) -> u64 {
let (counter, offset, pmc_width) = if cfg!(accurate_seqlock_rdpmc) && false {
self.access_mmap_page_with_seqlock(|mp| {
let caps = unsafe { mp.__bindgen_anon_1.__bindgen_anon_1 };
assert_ne!(caps.cap_user_rdpmc(), 0);
(
rdpmc(mp.index.checked_sub(1).unwrap()),
mp.offset,
mp.pmc_width,
)
})
} else {
(rdpmc(self.reg_idx), 0, 48)
};
let counter = offset + (counter as i64);
(counter << (64 - pmc_width) >> (64 - pmc_width)) as u64
}
}
impl super::HwCounterRead for (&Counter, &Counter) {
type Output = (u64, u64);
#[inline]
fn read(&self) -> (u64, u64) {
if (cfg!(accurate_seqlock_rdpmc) || cfg!(unserialized_rdpmc)) && false {
return (self.0.read(), self.1.read());
}
let pmc_width = 48;
let (a_counter, b_counter) = rdpmc_pair(self.0.reg_idx, self.1.reg_idx);
(
((a_counter as i64) << (64 - pmc_width) >> (64 - pmc_width)) as u64,
((b_counter as i64) << (64 - pmc_width) >> (64 - pmc_width)) as u64,
)
}
}
#[inline(always)]
fn rdpmc(reg_idx: u32) -> u64 {
if cfg!(unserialized_rdpmc) && false {
unserialized_rdpmc(reg_idx)
} else {
serialize_instruction_execution();
unserialized_rdpmc(reg_idx)
}
}
#[inline(always)]
fn rdpmc_pair(a_reg_idx: u32, b_reg_idx: u32) -> (u64, u64) {
serialize_instruction_execution();
(unserialized_rdpmc(a_reg_idx), unserialized_rdpmc(b_reg_idx))
}
#[inline(always)]
fn serialize_instruction_execution() {
unsafe {
asm!(
"xor %eax, %eax", "mov %rbx, {tmp_rbx:r}", "cpuid",
"mov {tmp_rbx:r}, %rbx", tmp_rbx = lateout(reg) _,
lateout("eax") _,
lateout("edx") _,
lateout("ecx") _,
options(nostack),
options(att_syntax),
);
}
}
#[inline(always)]
fn unserialized_rdpmc(reg_idx: u32) -> u64 {
let (lo, hi): (u32, u32);
unsafe {
asm!(
"rdpmc",
in("ecx") reg_idx,
lateout("eax") lo,
lateout("edx") hi,
options(nostack),
options(att_syntax),
);
}
lo as u64 | (hi as u64) << 32
}
pub(super) enum CpuModel {
Amd(AmdGen),
Intel(IntelGen),
}
pub(super) enum AmdGen {
PreZen,
Zen,
UnknownMaybeZenLike,
}
pub(super) enum IntelGen {
PreBridge,
Bridge,
Well,
Lake,
UnknownMaybeLakeLike,
}
impl CpuModel {
pub(super) fn detect() -> Result<Self, Box<dyn Error + Send + Sync>> {
let cpuid0 = unsafe { std::arch::x86_64::__cpuid(0) };
let cpuid1 = unsafe { std::arch::x86_64::__cpuid(1) };
let mut vendor = [0; 12];
vendor[0..4].copy_from_slice(&cpuid0.ebx.to_le_bytes());
vendor[4..8].copy_from_slice(&cpuid0.edx.to_le_bytes());
vendor[8..12].copy_from_slice(&cpuid0.ecx.to_le_bytes());
let vendor = std::str::from_utf8(&vendor).map_err(|_| {
format!(
"cpuid returned non-UTF-8 vendor name: cpuid(0)={:?} cpuid(1)={:?}",
cpuid0, cpuid1
)
})?;
let version = cpuid1.eax;
let mut family = (version >> 8) & 0xf;
if family == 15 {
family += (version >> 20) & 0xff;
}
let mut model = (version >> 4) & 0xf;
if family >= 15 || vendor == "GenuineIntel" && family == 6 {
model += ((version >> 16) & 0xf) << 4;
}
info!(
"CpuModel::detect: vendor={:?} family={} model={}",
vendor, family, model
);
match vendor {
"AuthenticAMD" => {
use self::AmdGen::*;
let (gen, name) = match (family, model) {
(0..=14, _) | (19, _) => {
return Err(format!(
"impossible AMD64 CPU detected (Family {} Model {}); {}",
family,
model,
super::BUG_REPORT_MSG
)
.into());
}
(15, _) => (PreZen, "K8 (Hammer)"),
(16, _) => (PreZen, "K10 (Barcelona/Shanghai/Istanbul)"),
(17, _) => (PreZen, "K8+K10 hybrid (Turion X2 Ultra)"),
(18, _) => (PreZen, "Fusion"),
(20, _) => (PreZen, "Bobcat"),
(21, _) => (PreZen, "Bulldozer / Piledriver / Steamroller / Excavator"),
(22, _) => (PreZen, "Jaguar / Puma"),
(23, 1) => (Zen, "Zen (Naples/Whitehaven/Summit Ridge/Snowy Owl)"),
(23, 17) => (Zen, "Zen (Raven Ridge)"),
(23, 24) => (Zen, "Zen (Banded Kestrel/Dali) / Zen+ (Picasso)"),
(23, 8) => (Zen, "Zen+ (Pinnacle Ridge)"),
(23, 49) => (Zen, "Zen 2 (Rome/Castle Peak)"),
(23, 113) => (Zen, "Zen 2 (Matisse)"),
(23..=0xffff_ffff, _) => {
really_warn!(
"CpuModel::detect: unknown AMD CPU (Family {} Model {}), \
assuming Zen-like; {}",
family,
model,
super::BUG_REPORT_MSG
);
(UnknownMaybeZenLike, "")
}
};
if !name.is_empty() {
info!("CpuModel::detect: known AMD CPU: {}", name);
}
if matches!(gen, Zen | UnknownMaybeZenLike) {
if let Ok(spec_lock_map_commit) =
Counter::with_type_and_hw_id(PERF_TYPE_RAW, 0x08_25)
{
use super::HwCounterRead;
let start_spec_lock_map_commit = spec_lock_map_commit.read();
let mut atomic: u64 = 0;
let mut _tmp: u64 = 0;
unsafe {
asm!(
"lock xadd {tmp}, ({atomic})",
atomic = in(reg) &mut atomic,
tmp = inout(reg) _tmp,
options(att_syntax),
);
}
if spec_lock_map_commit.read() != start_spec_lock_map_commit {
really_warn!(
"CpuModel::detect: SpecLockMap detected, in AMD {} CPU; \
this may add some non-deterministic noise - \
for information on disabling SpecLockMap, see \
https://github.com/mozilla/rr/wiki/Zen",
name
);
}
}
}
Ok(CpuModel::Amd(gen))
}
"GenuineIntel" => {
use self::IntelGen::*;
let (gen, name) = match (family, model) {
(0..=5, _) => (PreBridge, ""),
(15, _) => (PreBridge, "Netburst"),
(6, 0..=41) => (PreBridge, ""),
(6, 87) => (PreBridge, "Knights Landing"),
(6, 133) => (PreBridge, "Knights Mill"),
(6, 53) | (6, 54) => (PreBridge, "Saltwell"),
(6, 55) | (6, 74) | (6, 77) | (6, 90) | (6, 93) => {
(PreBridge, "Silvermont")
}
(6, 76) => (PreBridge, "Airmont (Cherry Trail/Braswell)"),
(6, 44) => (PreBridge, "Westmere (Gulftown/EP)"),
(6, 46) => (PreBridge, "Nehalem (EX)"),
(6, 47) => (PreBridge, "Westmere (EX)"),
(6, 42) => (Bridge, "Sandy Bridge (M/H)"),
(6, 45) => (Bridge, "Sandy Bridge (E/EN/EP)"),
(6, 58) => (Bridge, "Ivy Bridge (M/H/Gladden)"),
(6, 62) => (Bridge, "Ivy Bridge (E/EN/EP/EX)"),
(6, 60) => (Well, "Haswell (S)"),
(6, 61) => (Well, "Broadwell (U/Y/S)"),
(6, 63) => (Well, "Haswell (E/EP/EX)"),
(6, 69) => (Well, "Haswell (ULT)"),
(6, 70) => (Well, "Haswell (GT3e)"),
(6, 71) => (Well, "Broadwell (H/C/W)"),
(6, 79) => (Well, "Broadwell (E/EP/EX)"),
(6, 86) => (Well, "Broadwell (DE/Hewitt Lake)"),
(6, 78) => (Lake, "Skylake (Y/U)"),
(6, 85) => (Lake, "Skylake (SP/X/DE/W) / Cascade Lake (SP/X/W)"),
(6, 94) => (Lake, "Skylake (DT/H/S)"),
(6, 142) => (Lake, "Kaby Lake (Y/U) / Coffee Lake (U)"),
(6, 158) => (Lake, "Kaby Lake (DT/H/S/X) / Coffee Lake (S/H/E)"),
(6..=14, _) | (16..=0xffff_ffff, _) => {
really_warn!(
"CpuModel::detect: unknown Intel CPU (Family {} Model {}), \
assuming Skylake-like; {}",
family,
model,
super::BUG_REPORT_MSG
);
(UnknownMaybeLakeLike, "")
}
};
if !name.is_empty() {
info!("CpuModel::detect: known Intel CPU: {}", name);
}
Ok(CpuModel::Intel(gen))
}
_ => Err(format!(
"cpuid returned unknown CPU vendor {:?}; version={:#x}",
vendor, version
)
.into()),
}
}
fn irqs_counter_config(&self) -> Result<u32, Box<dyn Error + Send + Sync>> {
match self {
CpuModel::Amd(model) => match model {
AmdGen::PreZen => Ok(0x00_cf),
AmdGen::Zen | AmdGen::UnknownMaybeZenLike => Ok(0x00_2c),
},
CpuModel::Intel(model) => match model {
IntelGen::PreBridge => Err(format!(
"counting IRQs not yet supported on Intel CPUs \
predating Sandy Bridge; {}",
super::BUG_REPORT_MSG
)
.into()),
IntelGen::Bridge
| IntelGen::Well
| IntelGen::Lake
| IntelGen::UnknownMaybeLakeLike => Ok(0x01_cb),
},
}
}
}
}
#[cfg(not(all(target_arch = "x86_64", target_os = "linux", not(target_env = "ohos"))))]
mod hw {
use std::error::Error;
pub(super) enum Counter {}
impl Counter {
pub(super) fn new(
model: &CpuModel,
_: super::HwCounterType,
) -> Result<Self, Box<dyn Error + Send + Sync>> {
match *model {}
}
}
impl super::HwCounterRead for Counter {
type Output = u64;
#[inline]
fn read(&self) -> u64 {
match *self {}
}
}
impl super::HwCounterRead for (&Counter, &Counter) {
type Output = (u64, u64);
#[inline]
fn read(&self) -> (u64, u64) {
match *self.0 {}
}
}
pub(super) enum CpuModel {}
impl CpuModel {
pub(super) fn detect() -> Result<Self, Box<dyn Error + Send + Sync>> {
if false {
really_warn!("unsupported; {}", super::BUG_REPORT_MSG);
}
let mut msg = String::new();
let mut add_error = |s| {
if !msg.is_empty() {
msg += "; ";
}
msg += s;
};
if cfg!(not(target_arch = "x86_64")) {
add_error("only supported architecture is x86_64");
}
if cfg!(not(target_os = "linux")) {
add_error("only supported OS is Linux");
}
if cfg!(target_env = "ohos") {
add_error("unsupported OHOS environment");
}
Err(msg.into())
}
}
}