criterion_linux_perf/
lib.rs

1//! This is a measurement plugin for
2//! [Criterion.rs](https://bheisler.github.io/criterion.rs/book/index.html)
3//! that provides measurements using Linux's perf interface.
4//!
5//! # Example
6//!
7//! ```
8//! use criterion::{criterion_group, criterion_main, Criterion};
9//! use criterion_linux_perf::{PerfMeasurement, PerfMode};
10//!
11//! fn timeit(crit: &mut Criterion<PerfMeasurement>) {
12//!     crit.bench_function("String::new", |b| b.iter(|| String::new()));
13//!     crit.bench_function("String::from", |b| b.iter(|| String::from("")));
14//! }
15//!
16//! criterion_group!(
17//!     name = benches;
18//!     config = Criterion::default().with_measurement(
19//!         PerfMeasurement::new(PerfMode::Branches),
20//!     );
21//!     targets = timeit
22//! );
23//! criterion_main!(benches);
24//! ```
25
26#![deny(missing_docs)]
27
28use criterion::{
29    measurement::{Measurement, ValueFormatter},
30    Throughput,
31};
32use perf_event::{
33    events::{Event, Hardware},
34    Counter,
35};
36
37macro_rules! perf_mode {
38    ( $( $ident:ident = $event:expr => $unit:literal, )* ) => {
39        impl PerfMode {
40            fn event(&self) -> Event {
41                match self {
42                    $( Self::$ident => $event.into(), )*
43                }
44            }
45
46             fn units(&self) -> (&'static str, &'static str, &'static str) {
47                match self {
48                    $( Self::$ident => (
49                        $unit,
50                        concat!($unit, "/byte"),
51                        concat!($unit, "/element"),
52                    ), )*
53                }
54            }
55        }
56    };
57}
58
59/// The perf counter to measure when running a benchmark.
60#[derive(Clone, Copy, Debug, Eq, PartialEq)]
61pub enum PerfMode {
62    /// The number of instructions retired. These can be affected by
63    /// various issues, most notably hardware interrupt counts.
64    Instructions,
65    /// The total number of CPU cycles. This can be affected by CPU
66    /// frequency scaling.
67    Cycles,
68    /// The number of branch instructions retired.
69    Branches,
70    /// The number of mispredicted branches.
71    BranchMisses,
72    /// The number of cache accesses.
73    CacheRefs,
74    /// The number of cache misses.
75    CacheMisses,
76    /// The number of bus cycles elapsed.
77    BusCycles,
78    /// The total number of CPU cycles elapsed. This is not affected by
79    /// CPU frequency scaling.
80    RefCycles,
81}
82
83perf_mode! {
84    Instructions = Hardware::INSTRUCTIONS => "instructions",
85    Cycles = Hardware::CPU_CYCLES => "cycles",
86    Branches = Hardware::BRANCH_INSTRUCTIONS => "branches",
87    BranchMisses = Hardware::BRANCH_MISSES => "branch misses",
88    CacheRefs = Hardware::CACHE_REFERENCES => "cache refs",
89    CacheMisses = Hardware::CACHE_MISSES => "cache misses",
90    BusCycles = Hardware::BUS_CYCLES => "bus cycles",
91    RefCycles = Hardware::REF_CPU_CYCLES => "cycles",
92}
93
94/// The measurement type to be used with `Criterion::with_measurement()`.
95///
96/// The default measurement created by `PerfMeasurement::default()` is
97/// [`PerfMode`]`::Instructions`.
98#[derive(Clone)]
99pub struct PerfMeasurement {
100    event: Event,
101    formatter: PerfFormatter,
102}
103
104impl Default for PerfMeasurement {
105    fn default() -> Self {
106        Self::new(PerfMode::Instructions)
107    }
108}
109
110impl PerfMeasurement {
111    /// Create a new measurement, using the given [`PerfMode`] event.
112    pub fn new(mode: PerfMode) -> Self {
113        let units = mode.units();
114        let event = mode.event();
115        let formatter = PerfFormatter {
116            units: units.0,
117            throughput_bytes: units.1,
118            throughput_elements: units.2,
119        };
120        Self { event, formatter }
121    }
122}
123
124impl Measurement for PerfMeasurement {
125    type Intermediate = Counter;
126    type Value = u64;
127
128    fn start(&self) -> Self::Intermediate {
129        let mut counter = perf_event::Builder::new()
130            .kind(self.event.clone())
131            .build()
132            .unwrap();
133        counter.enable().unwrap();
134        counter
135    }
136
137    fn end(&self, mut counter: Self::Intermediate) -> Self::Value {
138        counter.disable().unwrap();
139        counter.read().unwrap()
140    }
141
142    fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
143        v1 + v2
144    }
145
146    fn zero(&self) -> Self::Value {
147        0
148    }
149
150    fn to_f64(&self, val: &Self::Value) -> f64 {
151        *val as f64
152    }
153
154    fn formatter(&self) -> &dyn ValueFormatter {
155        &self.formatter
156    }
157}
158
159#[derive(Clone)]
160struct PerfFormatter {
161    units: &'static str,
162    throughput_bytes: &'static str,
163    throughput_elements: &'static str,
164}
165
166impl ValueFormatter for PerfFormatter {
167    fn scale_values(&self, _typical_value: f64, _values: &mut [f64]) -> &'static str {
168        self.units
169    }
170
171    fn scale_throughputs(
172        &self,
173        _typical_value: f64,
174        throughput: &Throughput,
175        values: &mut [f64],
176    ) -> &'static str {
177        match throughput {
178            Throughput::Bytes(n) => {
179                for val in values {
180                    *val /= *n as f64;
181                }
182                self.throughput_bytes
183            }
184            Throughput::Elements(n) => {
185                for val in values {
186                    *val /= *n as f64;
187                }
188                self.throughput_elements
189            }
190        }
191    }
192
193    fn scale_for_machines(&self, _values: &mut [f64]) -> &'static str {
194        self.units
195    }
196}