perf_event/lib.rs
1//! A performance monitoring API for Linux.
2//!
3//! This crate provides access to processor and kernel counters for things like
4//! instruction completions, cache references and misses, branch predictions,
5//! context switches, page faults, and so on.
6//!
7//! For example, to compare the number of clock cycles elapsed with the number
8//! of instructions completed during one call to `println!`:
9//!
10//! use perf_event::{Builder, Group};
11//! use perf_event::events::Hardware;
12//!
13//! fn main() -> std::io::Result<()> {
14//! // A `Group` lets us enable and disable several counters atomically.
15//! let mut group = Group::new()?;
16//! let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
17//! let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
18//!
19//! let vec = (0..=51).collect::<Vec<_>>();
20//!
21//! group.enable()?;
22//! println!("{:?}", vec);
23//! group.disable()?;
24//!
25//! let counts = group.read()?;
26//! println!("cycles / instructions: {} / {} ({:.2} cpi)",
27//! counts[&cycles],
28//! counts[&insns],
29//! (counts[&cycles] as f64 / counts[&insns] as f64));
30//!
31//! Ok(())
32//! }
33//!
34//! This crate is built on top of the Linux [`perf_event_open`][man] system
35//! call; that documentation has the authoritative explanations of exactly what
36//! all the counters mean.
37//!
38//! There are two main types for measurement:
39//!
40//! - A [`Counter`] is an individual counter. Use [`Builder`] to
41//! construct one.
42//!
43//! - A [`Group`] is a collection of counters that can be enabled and
44//! disabled atomically, so that they cover exactly the same period of
45//! execution, allowing meaningful comparisons of the individual values.
46//!
47//! If you're familiar with the kernel API already:
48//!
49//! - A `Builder` holds the arguments to a `perf_event_open` call:
50//! a `struct perf_event_attr` and a few other fields.
51//!
52//! - `Counter` and `Group` objects are just event file descriptors, together
53//! with their kernel id numbers, and some other details you need to
54//! actually use them. They're different types because they yield different
55//! types of results, and because you can't retrieve a `Group`'s counts
56//! without knowing how many members it has.
57//!
58//! ### Call for PRs
59//!
60//! Linux's `perf_event_open` API can report all sorts of things this crate
61//! doesn't yet understand: stack traces, logs of executable and shared library
62//! activity, tracepoints, kprobes, uprobes, and so on. And beyond the counters
63//! in the kernel header files, there are others that can only be found at
64//! runtime by consulting `sysfs`, specific to particular processors and
65//! devices. For example, modern Intel processors have counters that measure
66//! power consumption in Joules.
67//!
68//! If you find yourself in need of something this crate doesn't support, please
69//! consider submitting a pull request.
70//!
71//! [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
72
73#![deny(missing_docs)]
74
75/// A helper macro for silencing warnings when a type is only implemented so
76/// that it can be linked in the docs.
77macro_rules! used_in_docs {
78 ($t:ident) => {
79 const _: () = {
80 // Using a module here means that this macro can accept any identifier that
81 // would normally be used in an import statement.
82 mod use_item {
83 #[allow(unused_imports)]
84 use super::$t;
85 }
86 };
87 };
88}
89
90use perf_event_open_sys::bindings::perf_event_attr;
91use std::fs::File;
92use std::io::{self, Read};
93use std::os::raw::{c_int, c_uint};
94use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
95
96pub mod events;
97
98#[cfg(feature = "hooks")]
99pub mod hooks;
100
101mod builder;
102mod counter;
103mod flags;
104
105// When the `"hooks"` feature is not enabled, call directly into
106// `perf-event-open-sys`.
107#[cfg(not(feature = "hooks"))]
108use perf_event_open_sys as sys;
109
110// When the `"hooks"` feature is enabled, `sys` functions allow for
111// interposed functions that provide simulated results for testing.
112#[cfg(feature = "hooks")]
113use hooks::sys;
114
115pub use crate::builder::Builder;
116pub use crate::counter::Counter;
117pub use crate::flags::{Clock, ReadFormat, SampleBranchFlag, SampleSkid};
118
119/// A group of counters that can be managed as a unit.
120///
121/// A `Group` represents a group of [`Counter`]s that can be enabled,
122/// disabled, reset, or read as a single atomic operation. This is necessary if
123/// you want to compare counter values, produce ratios, and so on, since those
124/// operations are only meaningful on counters that cover exactly the same
125/// period of execution.
126///
127/// A `Counter` is placed in a group when it is created, by calling the
128/// `Builder`'s [`group`] method. A `Group`'s [`read`] method returns values
129/// of all its member counters at once as a [`Counts`] value, which can be
130/// indexed by `Counter` to retrieve a specific value.
131///
132/// For example, the following program computes the average number of cycles
133/// used per instruction retired for a call to `println!`:
134///
135/// # fn main() -> std::io::Result<()> {
136/// use perf_event::{Builder, Group};
137/// use perf_event::events::Hardware;
138///
139/// let mut group = Group::new()?;
140/// let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
141/// let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
142///
143/// let vec = (0..=51).collect::<Vec<_>>();
144///
145/// group.enable()?;
146/// println!("{:?}", vec);
147/// group.disable()?;
148///
149/// let counts = group.read()?;
150/// println!("cycles / instructions: {} / {} ({:.2} cpi)",
151/// counts[&cycles],
152/// counts[&insns],
153/// (counts[&cycles] as f64 / counts[&insns] as f64));
154/// # Ok(()) }
155///
156/// The lifetimes of `Counter`s and `Group`s are independent: placing a
157/// `Counter` in a `Group` does not take ownership of the `Counter`, nor must
158/// the `Counter`s in a group outlive the `Group`. If a `Counter` is dropped, it
159/// is simply removed from its `Group`, and omitted from future results. If a
160/// `Group` is dropped, its individual counters continue to count.
161///
162/// Enabling or disabling a `Group` affects each `Counter` that belongs to it.
163/// Subsequent reads from the `Counter` will not reflect activity while the
164/// `Group` was disabled, unless the `Counter` is re-enabled individually.
165///
166/// A `Group` and its members must all observe the same tasks and cpus; mixing
167/// these makes building the `Counter` return an error. Unfortunately, there is
168/// no way at present to specify a `Group`'s task and cpu, so you can only use
169/// `Group` on the calling task. If this is a problem, please file an issue.
170///
171/// Internally, a `Group` is just a wrapper around an event file descriptor.
172///
173/// ## Limits on group size
174///
175/// Hardware counters are implemented using special-purpose registers on the
176/// processor, of which there are only a fixed number. (For example, an Intel
177/// high-end laptop processor from 2015 has four such registers per virtual
178/// processor.) Without using groups, if you request more hardware counters than
179/// the processor can actually support, a complete count isn't possible, but the
180/// kernel will rotate the processor's real registers amongst the measurements
181/// you've requested to at least produce a sample.
182///
183/// But since the point of a counter group is that its members all cover exactly
184/// the same period of time, this tactic can't be applied to support large
185/// groups. If the kernel cannot schedule a group, its counters remain zero. I
186/// think you can detect this situation by comparing the group's [`time_enabled`]
187/// and [`time_running`] values. It might also be useful to set the `pinned` bit,
188/// which puts the counter in an error state if it's not able to be put on the
189/// CPU; see [#10].
190///
191/// According to the `perf_list(1)` man page, you may be able to free up a
192/// hardware counter by disabling the kernel's NMI watchdog, which reserves one
193/// for detecting kernel hangs:
194///
195/// ```ignore
196/// $ echo 0 > /proc/sys/kernel/nmi_watchdog
197/// ```
198///
199/// You can reenable the watchdog when you're done like this:
200///
201/// ```ignore
202/// $ echo 1 > /proc/sys/kernel/nmi_watchdog
203/// ```
204///
205/// [`group`]: Builder::group
206/// [`read`]: Group::read
207/// [`#5`]: https://github.com/jimblandy/perf-event/issues/5
208/// [`#10`]: https://github.com/jimblandy/perf-event/issues/10
209/// [`time_enabled`]: Counts::time_enabled
210/// [`time_running`]: Counts::time_running
211pub struct Group {
212 /// The file descriptor for this counter, returned by `perf_event_open`.
213 /// This counter itself is for the dummy software event, so it's not
214 /// interesting.
215 file: File,
216
217 /// The unique id assigned to this group by the kernel. We only use this for
218 /// assertions.
219 id: u64,
220
221 /// An upper bound on the number of Counters in this group. This lets us
222 /// allocate buffers of sufficient size for for PERF_FORMAT_GROUP reads.
223 ///
224 /// There's no way to ask the kernel how many members a group has. And if we
225 /// pass a group read a buffer that's too small, the kernel won't just
226 /// return a truncated result; it returns ENOSPC and leaves the buffer
227 /// untouched. So the buffer just has to be large enough.
228 ///
229 /// Since we're borrowed while building group members, adding members can
230 /// increment this counter. But it's harder to decrement it when a member
231 /// gets dropped: we don't require that a Group outlive its members, so they
232 /// can't necessarily update their `Group`'s count from a `Drop` impl. So we
233 /// just increment, giving us an overestimate, and then correct the count
234 /// when we actually do a read.
235 ///
236 /// This includes the dummy counter for the group itself.
237 max_members: usize,
238}
239
240/// A collection of counts from a [`Group`] of counters.
241///
242/// This is the type returned by calling [`read`] on a [`Group`].
243/// You can index it with a reference to a specific `Counter`:
244///
245/// # fn main() -> std::io::Result<()> {
246/// # use perf_event::{Builder, Group};
247/// # let mut group = Group::new()?;
248/// # let cycles = Builder::new().group(&mut group).build()?;
249/// # let insns = Builder::new().group(&mut group).build()?;
250/// let counts = group.read()?;
251/// println!("cycles / instructions: {} / {} ({:.2} cpi)",
252/// counts[&cycles],
253/// counts[&insns],
254/// (counts[&cycles] as f64 / counts[&insns] as f64));
255/// # Ok(()) }
256///
257/// Or you can iterate over the results it contains:
258///
259/// # fn main() -> std::io::Result<()> {
260/// # use perf_event::Group;
261/// # let counts = Group::new()?.read()?;
262/// for (id, value) in &counts {
263/// println!("Counter id {} has value {}", id, value);
264/// }
265/// # Ok(()) }
266///
267/// The `id` values produced by this iteration are internal identifiers assigned
268/// by the kernel. You can use the [`Counter::id`] method to find a
269/// specific counter's id.
270///
271/// For some kinds of events, the kernel may use timesharing to give all
272/// counters access to scarce hardware registers. You can see how long a group
273/// was actually running versus the entire time it was enabled using the
274/// `time_enabled` and `time_running` methods:
275///
276/// # fn main() -> std::io::Result<()> {
277/// # use perf_event::{Builder, Group};
278/// # let mut group = Group::new()?;
279/// # let insns = Builder::new().group(&mut group).build()?;
280/// # let counts = group.read()?;
281/// let scale = counts.time_enabled() as f64 /
282/// counts.time_running() as f64;
283/// for (id, value) in &counts {
284/// print!("Counter id {} has value {}",
285/// id, (*value as f64 * scale) as u64);
286/// if scale > 1.0 {
287/// print!(" (estimated)");
288/// }
289/// println!();
290/// }
291///
292/// # Ok(()) }
293///
294/// [`read`]: Group::read
295pub struct Counts {
296 // Raw results from the `read`.
297 data: Vec<u64>,
298}
299
300/// The value of a counter, along with timesharing data.
301///
302/// Some counters are implemented in hardware, and the processor can run
303/// only a fixed number of them at a time. If more counters are requested
304/// than the hardware can support, the kernel timeshares them on the
305/// hardware.
306///
307/// This struct holds the value of a counter, together with the time it was
308/// enabled, and the proportion of that for which it was actually running.
309#[repr(C)]
310pub struct CountAndTime {
311 /// The counter value.
312 ///
313 /// The meaning of this field depends on how the counter was configured when
314 /// it was built; see ['Builder'].
315 pub count: u64,
316
317 /// How long this counter was enabled by the program, in nanoseconds.
318 pub time_enabled: u64,
319
320 /// How long the kernel actually ran this counter, in nanoseconds.
321 ///
322 /// If `time_enabled == time_running`, then the counter ran for the entire
323 /// period it was enabled, without interruption. Otherwise, the counter
324 /// shared the underlying hardware with others, and you should prorate its
325 /// value accordingly.
326 pub time_running: u64,
327}
328
329impl Group {
330 /// Construct a new, empty `Group`.
331 #[allow(unused_parens)]
332 pub fn new() -> io::Result<Group> {
333 // Open a placeholder perf counter that we can add other events to.
334 let mut attrs = perf_event_attr {
335 size: std::mem::size_of::<perf_event_attr>() as u32,
336 type_: sys::bindings::PERF_TYPE_SOFTWARE,
337 config: sys::bindings::PERF_COUNT_SW_DUMMY as u64,
338 ..perf_event_attr::default()
339 };
340
341 attrs.set_disabled(1);
342 attrs.set_exclude_kernel(1);
343 attrs.set_exclude_hv(1);
344
345 // Arrange to be able to identify the counters we read back.
346 attrs.read_format = (sys::bindings::PERF_FORMAT_TOTAL_TIME_ENABLED
347 | sys::bindings::PERF_FORMAT_TOTAL_TIME_RUNNING
348 | sys::bindings::PERF_FORMAT_ID
349 | sys::bindings::PERF_FORMAT_GROUP) as u64;
350
351 let file = unsafe {
352 File::from_raw_fd(check_errno_syscall(|| {
353 sys::perf_event_open(&mut attrs, 0, -1, -1, 0)
354 })?)
355 };
356
357 // Retrieve the ID the kernel assigned us.
358 let mut id = 0_u64;
359 check_errno_syscall(|| unsafe { sys::ioctls::ID(file.as_raw_fd(), &mut id) })?;
360
361 Ok(Group {
362 file,
363 id,
364 max_members: 1,
365 })
366 }
367
368 /// Allow all `Counter`s in this `Group` to begin counting their designated
369 /// events, as a single atomic operation.
370 ///
371 /// This does not affect whatever values the `Counter`s had previously; new
372 /// events add to the current counts. To clear the `Counter`s, use the
373 /// [`reset`] method.
374 ///
375 /// [`reset`]: #method.reset
376 pub fn enable(&mut self) -> io::Result<()> {
377 self.generic_ioctl(sys::ioctls::ENABLE)
378 }
379
380 /// Make all `Counter`s in this `Group` stop counting their designated
381 /// events, as a single atomic operation. Their counts are unaffected.
382 pub fn disable(&mut self) -> io::Result<()> {
383 self.generic_ioctl(sys::ioctls::DISABLE)
384 }
385
386 /// Reset all `Counter`s in this `Group` to zero, as a single atomic operation.
387 pub fn reset(&mut self) -> io::Result<()> {
388 self.generic_ioctl(sys::ioctls::RESET)
389 }
390
391 /// Perform some group ioctl.
392 ///
393 /// `f` must be a syscall that sets `errno` and returns `-1` on failure.
394 fn generic_ioctl(&mut self, f: unsafe fn(c_int, c_uint) -> c_int) -> io::Result<()> {
395 check_errno_syscall(|| unsafe {
396 f(self.file.as_raw_fd(), sys::bindings::PERF_IOC_FLAG_GROUP)
397 })
398 .map(|_| ())
399 }
400
401 /// Return the values of all the `Counter`s in this `Group` as a [`Counts`]
402 /// value.
403 ///
404 /// A `Counts` value is a map from specific `Counter`s to their values. You
405 /// can find a specific `Counter`'s value by indexing:
406 ///
407 /// ```ignore
408 /// let mut group = Group::new()?;
409 /// let counter1 = Builder::new().group(&mut group).kind(...).build()?;
410 /// let counter2 = Builder::new().group(&mut group).kind(...).build()?;
411 /// ...
412 /// let counts = group.read()?;
413 /// println!("Rhombus inclinations per taxi medallion: {} / {} ({:.0}%)",
414 /// counts[&counter1],
415 /// counts[&counter2],
416 /// (counts[&counter1] as f64 / counts[&counter2] as f64) * 100.0);
417 /// ```
418 ///
419 /// [`Counts`]: struct.Counts.html
420 pub fn read(&mut self) -> io::Result<Counts> {
421 // Since we passed `PERF_FORMAT_{ID,GROUP,TOTAL_TIME_{ENABLED,RUNNING}}`,
422 // the data we'll read has the form:
423 //
424 // struct read_format {
425 // u64 nr; /* The number of events */
426 // u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
427 // u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
428 // struct {
429 // u64 value; /* The value of the event */
430 // u64 id; /* if PERF_FORMAT_ID */
431 // } values[nr];
432 // };
433 let mut data = vec![0_u64; 3 + 2 * self.max_members];
434 assert_eq!(
435 self.file.read(u64::slice_as_bytes_mut(&mut data))?,
436 std::mem::size_of_val(&data[..])
437 );
438
439 let counts = Counts { data };
440
441 // CountsIter assumes that the group's dummy count appears first.
442 assert_eq!(counts.nth_ref(0).0, self.id);
443
444 // Does the kernel ever return nonsense?
445 assert!(counts.time_running() <= counts.time_enabled());
446
447 // Update `max_members` for the next read.
448 self.max_members = counts.len();
449
450 Ok(counts)
451 }
452}
453
454impl std::fmt::Debug for Group {
455 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
456 write!(
457 fmt,
458 "Group {{ fd: {}, id: {} }}",
459 self.file.as_raw_fd(),
460 self.id
461 )
462 }
463}
464
465impl AsRawFd for Group {
466 fn as_raw_fd(&self) -> RawFd {
467 self.file.as_raw_fd()
468 }
469}
470
471impl IntoRawFd for Group {
472 fn into_raw_fd(self) -> RawFd {
473 self.file.into_raw_fd()
474 }
475}
476
477impl Counts {
478 /// Return the number of counters this `Counts` holds results for.
479 #[allow(clippy::len_without_is_empty)] // Groups are never empty.
480 pub fn len(&self) -> usize {
481 self.data[0] as usize
482 }
483
484 /// Return the number of nanoseconds the `Group` was enabled that
485 /// contributed to this `Counts`' contents.
486 pub fn time_enabled(&self) -> u64 {
487 self.data[1]
488 }
489
490 /// Return the number of nanoseconds the `Group` was actually collecting
491 /// counts that contributed to this `Counts`' contents.
492 pub fn time_running(&self) -> u64 {
493 self.data[2]
494 }
495
496 /// Return a range of indexes covering the count and id of the `n`'th counter.
497 fn nth_index(n: usize) -> std::ops::Range<usize> {
498 let base = 3 + 2 * n;
499 base..base + 2
500 }
501
502 /// Return the id and count of the `n`'th counter. This returns a reference
503 /// to the count, for use by the `Index` implementation.
504 fn nth_ref(&self, n: usize) -> (u64, &u64) {
505 let id_val = &self.data[Counts::nth_index(n)];
506
507 // (id, &value)
508 (id_val[1], &id_val[0])
509 }
510}
511
512/// An iterator over the counter values in a [`Counts`], returned by
513/// [`Group::read`].
514///
515/// Each item is a pair `(id, &value)`, where `id` is the number assigned to the
516/// counter by the kernel (see `Counter::id`), and `value` is that counter's
517/// value.
518///
519/// [`Counts`]: struct.Counts.html
520/// [`Counter::id`]: struct.Counter.html#method.id
521/// [`Group::read`]: struct.Group.html#method.read
522pub struct CountsIter<'c> {
523 counts: &'c Counts,
524 next: usize,
525}
526
527impl<'c> Iterator for CountsIter<'c> {
528 type Item = (u64, &'c u64);
529 fn next(&mut self) -> Option<(u64, &'c u64)> {
530 if self.next >= self.counts.len() {
531 return None;
532 }
533 let result = self.counts.nth_ref(self.next);
534 self.next += 1;
535 Some(result)
536 }
537}
538
539impl<'c> IntoIterator for &'c Counts {
540 type Item = (u64, &'c u64);
541 type IntoIter = CountsIter<'c>;
542 fn into_iter(self) -> CountsIter<'c> {
543 CountsIter {
544 counts: self,
545 next: 1, // skip the `Group` itself, it's just a dummy.
546 }
547 }
548}
549
550impl Counts {
551 /// Return the value recorded for `member` in `self`, or `None` if `member`
552 /// is not present.
553 ///
554 /// If you know that `member` is in the group, you can simply index:
555 ///
556 /// # fn main() -> std::io::Result<()> {
557 /// # use perf_event::{Builder, Group};
558 /// # let mut group = Group::new()?;
559 /// # let cycle_counter = Builder::new().group(&mut group).build()?;
560 /// # let counts = group.read()?;
561 /// let cycles = counts[&cycle_counter];
562 /// # Ok(()) }
563 pub fn get(&self, member: &Counter) -> Option<&u64> {
564 self.into_iter()
565 .find(|&(id, _)| id == member.id())
566 .map(|(_, value)| value)
567 }
568
569 /// Return an iterator over the counts in `self`.
570 ///
571 /// # fn main() -> std::io::Result<()> {
572 /// # use perf_event::Group;
573 /// # let counts = Group::new()?.read()?;
574 /// for (id, value) in &counts {
575 /// println!("Counter id {} has value {}", id, value);
576 /// }
577 /// # Ok(()) }
578 ///
579 /// Each item is a pair `(id, &value)`, where `id` is the number assigned to
580 /// the counter by the kernel (see `Counter::id`), and `value` is that
581 /// counter's value.
582 pub fn iter<'a>(&'a self) -> CountsIter<'a> {
583 <&'a Counts as IntoIterator>::into_iter(self)
584 }
585}
586
587impl std::ops::Index<&Counter> for Counts {
588 type Output = u64;
589 fn index(&self, index: &Counter) -> &u64 {
590 self.get(index).unwrap()
591 }
592}
593
594impl std::fmt::Debug for Counts {
595 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
596 fmt.debug_map().entries(self.into_iter()).finish()
597 }
598}
599
600/// A type whose values can be safely accessed as a slice of bytes.
601///
602/// # Safety
603///
604/// `Self` must be a type such that storing a value in memory
605/// initializes all the bytes of that memory, so that
606/// `slice_as_bytes_mut` can never expose uninitialized bytes to the
607/// caller.
608unsafe trait SliceAsBytesMut: Sized {
609 fn slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8] {
610 unsafe {
611 std::slice::from_raw_parts_mut(
612 slice.as_mut_ptr() as *mut u8,
613 std::mem::size_of_val(slice),
614 )
615 }
616 }
617}
618
619unsafe impl SliceAsBytesMut for u64 {}
620
621/// Produce an `io::Result` from an errno-style system call.
622///
623/// An 'errno-style' system call is one that reports failure by returning -1 and
624/// setting the C `errno` value when an error occurs.
625fn check_errno_syscall<F, R>(f: F) -> io::Result<R>
626where
627 F: FnOnce() -> R,
628 R: PartialOrd + Default,
629{
630 let result = f();
631 if result < R::default() {
632 Err(io::Error::last_os_error())
633 } else {
634 Ok(result)
635 }
636}
637
638#[test]
639fn simple_build() {
640 Builder::new()
641 .build()
642 .expect("Couldn't build default Counter");
643}
644
645#[test]
646#[cfg(target_os = "linux")]
647fn test_error_code_is_correct() {
648 // This configuration should always result in EINVAL
649 let builder = Builder::new()
650 // CPU_CLOCK is literally always supported so we don't have to worry
651 // about test failures when in VMs.
652 .kind(events::Software::CPU_CLOCK)
653 // There should _hopefully_ never be a system with this many CPUs.
654 .one_cpu(i32::MAX as usize);
655
656 match builder.build() {
657 Ok(_) => panic!("counter construction was not supposed to succeed"),
658 Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EINVAL)),
659 }
660}