perf_event/lib.rs
1//! A performance monitoring API for Linux.
2//!
3//! This crate provides access to processor and kernel counters for things like
4//! instruction completions, cache references and misses, branch predictions,
5//! context switches, page faults, and so on.
6//!
7//! For example, to compare the number of clock cycles elapsed with the number
8//! of instructions completed during one call to `println!`:
9//!
10//! use perf_event::{Builder, Group};
11//! use perf_event::events::Hardware;
12//!
13//! fn main() -> std::io::Result<()> {
14//! // A `Group` lets us enable and disable several counters atomically.
15//! let mut group = Group::new()?;
16//! let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
17//! let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
18//!
19//! let vec = (0..=51).collect::<Vec<_>>();
20//!
21//! group.enable()?;
22//! println!("{:?}", vec);
23//! group.disable()?;
24//!
25//! let counts = group.read()?;
26//! println!("cycles / instructions: {} / {} ({:.2} cpi)",
27//! counts[&cycles],
28//! counts[&insns],
29//! (counts[&cycles] as f64 / counts[&insns] as f64));
30//!
31//! Ok(())
32//! }
33//!
34//! This crate is built on top of the Linux [`perf_event_open`][man] system
35//! call; that documentation has the authoritative explanations of exactly what
36//! all the counters mean.
37//!
38//! There are two main types for measurement:
39//!
40//! - A [`Counter`] is an individual counter. Use [`Builder`] to
41//! construct one.
42//!
43//! - A [`Group`] is a collection of counters that can be enabled and
44//! disabled atomically, so that they cover exactly the same period of
45//! execution, allowing meaningful comparisons of the individual values.
46//!
47//! If you're familiar with the kernel API already:
48//!
49//! - A `Builder` holds the arguments to a `perf_event_open` call:
50//! a `struct perf_event_attr` and a few other fields.
51//!
52//! - `Counter` and `Group` objects are just event file descriptors, together
53//! with their kernel id numbers, and some other details you need to
54//! actually use them. They're different types because they yield different
55//! types of results, and because you can't retrieve a `Group`'s counts
56//! without knowing how many members it has.
57//!
58//! ### Call for PRs
59//!
60//! Linux's `perf_event_open` API can report all sorts of things this crate
61//! doesn't yet understand: stack traces, logs of executable and shared library
62//! activity, tracepoints, kprobes, uprobes, and so on. And beyond the counters
63//! in the kernel header files, there are others that can only be found at
64//! runtime by consulting `sysfs`, specific to particular processors and
65//! devices. For example, modern Intel processors have counters that measure
66//! power consumption in Joules.
67//!
68//! If you find yourself in need of something this crate doesn't support, please
69//! consider submitting a pull request.
70//!
71//! [man]: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
72
73#![deny(missing_docs)]
74
75use events::Event;
76use libc::pid_t;
77use perf_event_open_sys::bindings::perf_event_attr;
78use std::fs::File;
79use std::io::{self, Read};
80use std::os::raw::{c_int, c_uint, c_ulong};
81use std::os::unix::io::{AsRawFd, FromRawFd};
82
83pub mod events;
84
85#[cfg(feature = "hooks")]
86pub mod hooks;
87
88// When the `"hooks"` feature is not enabled, call directly into
89// `perf-event-open-sys`.
90#[cfg(not(feature = "hooks"))]
91use perf_event_open_sys as sys;
92
93// When the `"hooks"` feature is enabled, `sys` functions allow for
94// interposed functions that provide simulated results for testing.
95#[cfg(feature = "hooks")]
96use hooks::sys;
97
98/// A counter for one kind of kernel or hardware event.
99///
100/// A `Counter` represents a single performance monitoring counter. You select
101/// what sort of event you'd like to count when the `Counter` is created, then
102/// you can enable and disable the counter, call its [`read`] method to
103/// retrieve the current count, and reset it to zero.
104///
105/// A `Counter`'s value is always a `u64`.
106///
107/// For example, this counts the number of instructions retired (completed)
108/// during a call to `println!`.
109///
110/// use perf_event::Builder;
111///
112/// fn main() -> std::io::Result<()> {
113/// let mut counter = Builder::new().build()?;
114///
115/// let vec = (0..=51).collect::<Vec<_>>();
116///
117/// counter.enable()?;
118/// println!("{:?}", vec);
119/// counter.disable()?;
120///
121/// println!("{} instructions retired", counter.read()?);
122///
123/// Ok(())
124/// }
125///
126/// It is often useful to count several different quantities over the same
127/// period of time. For example, if you want to measure the average number of
128/// clock cycles used per instruction, you must count both clock cycles and
129/// instructions retired, for the same range of execution. The [`Group`] type
130/// lets you enable, disable, read, and reset any number of counters
131/// simultaneously.
132///
133/// When a counter is dropped, its kernel resources are freed along with it.
134///
135/// Internally, a `Counter` is just a wrapper around an event file descriptor.
136///
137/// [`read`]: Counter::read
138pub struct Counter {
139 /// The file descriptor for this counter, returned by `perf_event_open`.
140 ///
141 /// When a `Counter` is dropped, this `File` is dropped, and the kernel
142 /// removes the counter from any group it belongs to.
143 file: File,
144
145 /// The unique id assigned to this counter by the kernel.
146 id: u64,
147}
148
149/// A builder for [`Counter`]s.
150///
151/// There are dozens of parameters that influence a `Counter`'s behavior.
152/// `Builder` lets you construct a `Counter` by specifying only those parameters
153/// for which you don't want the default value.
154///
155/// A freshly built `Counter` is disabled. To begin counting events, you must
156/// call [`enable`] on the `Counter` or the `Group` to which it belongs.
157///
158/// For example, if you want a `Counter` for instructions retired by the current
159/// process, those are `Builder`'s defaults, so you need only write:
160///
161/// # use perf_event::Builder;
162/// # fn main() -> std::io::Result<()> {
163/// let mut insns = Builder::new().build()?;
164/// # Ok(()) }
165///
166/// The [`kind`] method lets you specify what sort of event you want to
167/// count. So if you'd rather count branch instructions:
168///
169/// # use perf_event::Builder;
170/// # use perf_event::events::Hardware;
171/// # fn main() -> std::io::Result<()> {
172/// let mut insns = Builder::new()
173/// .kind(Hardware::BRANCH_INSTRUCTIONS)
174/// .build()?;
175/// # Ok(()) }
176///
177/// The [`group`] method lets you gather individual counters into `Group`
178/// that can be enabled or disabled atomically:
179///
180/// # use perf_event::{Builder, Group};
181/// # use perf_event::events::Hardware;
182/// # fn main() -> std::io::Result<()> {
183/// let mut group = Group::new()?;
184/// let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
185/// let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
186/// # Ok(()) }
187///
188/// Other methods let you select:
189///
190/// - specific processes or cgroups to observe
191/// - specific CPU cores to observe
192///
193/// `Builder` supports only a fraction of the many knobs and dials Linux offers,
194/// but hopefully it will acquire methods to support more of them as time goes
195/// on.
196///
197/// Internally, a `Builder` is just a wrapper around the kernel's `struct
198/// perf_event_attr` type.
199///
200/// [`enable`]: Counter::enable
201/// [`kind`]: Builder::kind
202/// [`group`]: Builder::group
203pub struct Builder<'a> {
204 attrs: perf_event_attr,
205 who: EventPid<'a>,
206 cpu: Option<usize>,
207 group: Option<&'a mut Group>,
208}
209
210#[derive(Debug)]
211enum EventPid<'a> {
212 /// Monitor the calling process.
213 ThisProcess,
214
215 /// Monitor the given pid.
216 Other(pid_t),
217
218 /// Monitor members of the given cgroup.
219 CGroup(&'a File),
220}
221
222/// A group of counters that can be managed as a unit.
223///
224/// A `Group` represents a group of [`Counter`]s that can be enabled,
225/// disabled, reset, or read as a single atomic operation. This is necessary if
226/// you want to compare counter values, produce ratios, and so on, since those
227/// operations are only meaningful on counters that cover exactly the same
228/// period of execution.
229///
230/// A `Counter` is placed in a group when it is created, by calling the
231/// `Builder`'s [`group`] method. A `Group`'s [`read`] method returns values
232/// of all its member counters at once as a [`Counts`] value, which can be
233/// indexed by `Counter` to retrieve a specific value.
234///
235/// For example, the following program computes the average number of cycles
236/// used per instruction retired for a call to `println!`:
237///
238/// # fn main() -> std::io::Result<()> {
239/// use perf_event::{Builder, Group};
240/// use perf_event::events::Hardware;
241///
242/// let mut group = Group::new()?;
243/// let cycles = Builder::new().group(&mut group).kind(Hardware::CPU_CYCLES).build()?;
244/// let insns = Builder::new().group(&mut group).kind(Hardware::INSTRUCTIONS).build()?;
245///
246/// let vec = (0..=51).collect::<Vec<_>>();
247///
248/// group.enable()?;
249/// println!("{:?}", vec);
250/// group.disable()?;
251///
252/// let counts = group.read()?;
253/// println!("cycles / instructions: {} / {} ({:.2} cpi)",
254/// counts[&cycles],
255/// counts[&insns],
256/// (counts[&cycles] as f64 / counts[&insns] as f64));
257/// # Ok(()) }
258///
259/// The lifetimes of `Counter`s and `Group`s are independent: placing a
260/// `Counter` in a `Group` does not take ownership of the `Counter`, nor must
261/// the `Counter`s in a group outlive the `Group`. If a `Counter` is dropped, it
262/// is simply removed from its `Group`, and omitted from future results. If a
263/// `Group` is dropped, its individual counters continue to count.
264///
265/// Enabling or disabling a `Group` affects each `Counter` that belongs to it.
266/// Subsequent reads from the `Counter` will not reflect activity while the
267/// `Group` was disabled, unless the `Counter` is re-enabled individually.
268///
269/// A `Group` and its members must all observe the same tasks and cpus; mixing
270/// these makes building the `Counter` return an error. Unfortunately, there is
271/// no way at present to specify a `Group`'s task and cpu, so you can only use
272/// `Group` on the calling task. If this is a problem, please file an issue.
273///
274/// Internally, a `Group` is just a wrapper around an event file descriptor.
275///
276/// ## Limits on group size
277///
278/// Hardware counters are implemented using special-purpose registers on the
279/// processor, of which there are only a fixed number. (For example, an Intel
280/// high-end laptop processor from 2015 has four such registers per virtual
281/// processor.) Without using groups, if you request more hardware counters than
282/// the processor can actually support, a complete count isn't possible, but the
283/// kernel will rotate the processor's real registers amongst the measurements
284/// you've requested to at least produce a sample.
285///
286/// But since the point of a counter group is that its members all cover exactly
287/// the same period of time, this tactic can't be applied to support large
288/// groups. If the kernel cannot schedule a group, its counters remain zero. I
289/// think you can detect this situation by comparing the group's [`time_enabled`]
290/// and [`time_running`] values. It might also be useful to set the `pinned` bit,
291/// which puts the counter in an error state if it's not able to be put on the
292/// CPU; see [#10].
293///
294/// According to the `perf_list(1)` man page, you may be able to free up a
295/// hardware counter by disabling the kernel's NMI watchdog, which reserves one
296/// for detecting kernel hangs:
297///
298/// ```ignore
299/// $ echo 0 > /proc/sys/kernel/nmi_watchdog
300/// ```
301///
302/// You can reenable the watchdog when you're done like this:
303///
304/// ```ignore
305/// $ echo 1 > /proc/sys/kernel/nmi_watchdog
306/// ```
307///
308/// [`group`]: Builder::group
309/// [`read`]: Group::read
310/// [`#5`]: https://github.com/jimblandy/perf-event/issues/5
311/// [`#10`]: https://github.com/jimblandy/perf-event/issues/10
312/// [`time_enabled`]: Counts::time_enabled
313/// [`time_running`]: Counts::time_running
314pub struct Group {
315 /// The file descriptor for this counter, returned by `perf_event_open`.
316 /// This counter itself is for the dummy software event, so it's not
317 /// interesting.
318 file: File,
319
320 /// The unique id assigned to this group by the kernel. We only use this for
321 /// assertions.
322 id: u64,
323
324 /// An upper bound on the number of Counters in this group. This lets us
325 /// allocate buffers of sufficient size for for PERF_FORMAT_GROUP reads.
326 ///
327 /// There's no way to ask the kernel how many members a group has. And if we
328 /// pass a group read a buffer that's too small, the kernel won't just
329 /// return a truncated result; it returns ENOSPC and leaves the buffer
330 /// untouched. So the buffer just has to be large enough.
331 ///
332 /// Since we're borrowed while building group members, adding members can
333 /// increment this counter. But it's harder to decrement it when a member
334 /// gets dropped: we don't require that a Group outlive its members, so they
335 /// can't necessarily update their `Group`'s count from a `Drop` impl. So we
336 /// just increment, giving us an overestimate, and then correct the count
337 /// when we actually do a read.
338 ///
339 /// This includes the dummy counter for the group itself.
340 max_members: usize,
341}
342
343/// A collection of counts from a [`Group`] of counters.
344///
345/// This is the type returned by calling [`read`] on a [`Group`].
346/// You can index it with a reference to a specific `Counter`:
347///
348/// # fn main() -> std::io::Result<()> {
349/// # use perf_event::{Builder, Group};
350/// # let mut group = Group::new()?;
351/// # let cycles = Builder::new().group(&mut group).build()?;
352/// # let insns = Builder::new().group(&mut group).build()?;
353/// let counts = group.read()?;
354/// println!("cycles / instructions: {} / {} ({:.2} cpi)",
355/// counts[&cycles],
356/// counts[&insns],
357/// (counts[&cycles] as f64 / counts[&insns] as f64));
358/// # Ok(()) }
359///
360/// Or you can iterate over the results it contains:
361///
362/// # fn main() -> std::io::Result<()> {
363/// # use perf_event::Group;
364/// # let counts = Group::new()?.read()?;
365/// for (id, value) in &counts {
366/// println!("Counter id {} has value {}", id, value);
367/// }
368/// # Ok(()) }
369///
370/// The `id` values produced by this iteration are internal identifiers assigned
371/// by the kernel. You can use the [`Counter::id`] method to find a
372/// specific counter's id.
373///
374/// For some kinds of events, the kernel may use timesharing to give all
375/// counters access to scarce hardware registers. You can see how long a group
376/// was actually running versus the entire time it was enabled using the
377/// `time_enabled` and `time_running` methods:
378///
379/// # fn main() -> std::io::Result<()> {
380/// # use perf_event::{Builder, Group};
381/// # let mut group = Group::new()?;
382/// # let insns = Builder::new().group(&mut group).build()?;
383/// # let counts = group.read()?;
384/// let scale = counts.time_enabled() as f64 /
385/// counts.time_running() as f64;
386/// for (id, value) in &counts {
387/// print!("Counter id {} has value {}",
388/// id, (*value as f64 * scale) as u64);
389/// if scale > 1.0 {
390/// print!(" (estimated)");
391/// }
392/// println!();
393/// }
394///
395/// # Ok(()) }
396///
397/// [`read`]: Group::read
398pub struct Counts {
399 // Raw results from the `read`.
400 data: Vec<u64>,
401}
402
403/// The value of a counter, along with timesharing data.
404///
405/// Some counters are implemented in hardware, and the processor can run
406/// only a fixed number of them at a time. If more counters are requested
407/// than the hardware can support, the kernel timeshares them on the
408/// hardware.
409///
410/// This struct holds the value of a counter, together with the time it was
411/// enabled, and the proportion of that for which it was actually running.
412#[repr(C)]
413pub struct CountAndTime {
414 /// The counter value.
415 ///
416 /// The meaning of this field depends on how the counter was configured when
417 /// it was built; see ['Builder'].
418 pub count: u64,
419
420 /// How long this counter was enabled by the program, in nanoseconds.
421 pub time_enabled: u64,
422
423 /// How long the kernel actually ran this counter, in nanoseconds.
424 ///
425 /// If `time_enabled == time_running`, then the counter ran for the entire
426 /// period it was enabled, without interruption. Otherwise, the counter
427 /// shared the underlying hardware with others, and you should prorate its
428 /// value accordingly.
429 pub time_running: u64,
430}
431
432impl<'a> EventPid<'a> {
433 // Return the `pid` arg and the `flags` bits representing `self`.
434 fn as_args(&self) -> (pid_t, u32) {
435 match self {
436 EventPid::ThisProcess => (0, 0),
437 EventPid::Other(pid) => (*pid, 0),
438 EventPid::CGroup(file) => (file.as_raw_fd(), sys::bindings::PERF_FLAG_PID_CGROUP),
439 }
440 }
441}
442
443impl<'a> Default for Builder<'a> {
444 fn default() -> Builder<'a> {
445 let mut attrs = perf_event_attr {
446 // Setting `size` accurately will not prevent the code from working
447 // on older kernels. The module comments for `perf_event_open_sys`
448 // explain why in far too much detail.
449 size: std::mem::size_of::<perf_event_attr>() as u32,
450 ..perf_event_attr::default()
451 };
452
453 attrs.set_disabled(1);
454 attrs.set_exclude_kernel(1); // don't count time in kernel
455 attrs.set_exclude_hv(1); // don't count time in hypervisor
456
457 // Request data for `time_enabled` and `time_running`.
458 attrs.read_format |= sys::bindings::PERF_FORMAT_TOTAL_TIME_ENABLED as u64
459 | sys::bindings::PERF_FORMAT_TOTAL_TIME_RUNNING as u64;
460
461 let kind = Event::Hardware(events::Hardware::INSTRUCTIONS);
462 attrs.type_ = kind.r#type();
463 attrs.config = kind.config();
464
465 Builder {
466 attrs,
467 who: EventPid::ThisProcess,
468 cpu: None,
469 group: None,
470 }
471 }
472}
473
474impl<'a> Builder<'a> {
475 /// Return a new `Builder`, with all parameters set to their defaults.
476 pub fn new() -> Builder<'a> {
477 Builder::default()
478 }
479
480 /// Observe the calling process. (This is the default.)
481 pub fn observe_self(mut self) -> Builder<'a> {
482 self.who = EventPid::ThisProcess;
483 self
484 }
485
486 /// Observe the process with the given process id. This requires
487 /// [`CAP_SYS_PTRACE`][man-capabilities] capabilities.
488 ///
489 /// [man-capabilities]: http://man7.org/linux/man-pages/man7/capabilities.7.html
490 pub fn observe_pid(mut self, pid: pid_t) -> Builder<'a> {
491 self.who = EventPid::Other(pid);
492 self
493 }
494
495 /// Observe code running in the given [cgroup][man-cgroups] (container). The
496 /// `cgroup` argument should be a `File` referring to the cgroup's directory
497 /// in the cgroupfs filesystem.
498 ///
499 /// [man-cgroups]: http://man7.org/linux/man-pages/man7/cgroups.7.html
500 pub fn observe_cgroup(mut self, cgroup: &'a File) -> Builder<'a> {
501 self.who = EventPid::CGroup(cgroup);
502 self
503 }
504
505 /// Observe only code running on the given CPU core.
506 pub fn one_cpu(mut self, cpu: usize) -> Builder<'a> {
507 self.cpu = Some(cpu);
508 self
509 }
510
511 /// Observe code running on any CPU core. (This is the default.)
512 pub fn any_cpu(mut self) -> Builder<'a> {
513 self.cpu = None;
514 self
515 }
516
517 /// Set whether this counter is inherited by new threads.
518 ///
519 /// When this flag is set, this counter observes activity in new threads
520 /// created by any thread already being observed.
521 ///
522 /// By default, the flag is unset: counters are not inherited, and observe
523 /// only the threads specified when they are created.
524 ///
525 /// This flag cannot be set if the counter belongs to a `Group`. Doing so
526 /// will result in an error when the counter is built. This is a kernel
527 /// limitation.
528 pub fn inherit(mut self, inherit: bool) -> Builder<'a> {
529 let flag = if inherit { 1 } else { 0 };
530 self.attrs.set_inherit(flag);
531 self
532 }
533
534 /// Count events of the given kind. This accepts an [`Event`] value,
535 /// or any type that can be converted to one, so you can pass [`Hardware`],
536 /// [`Software`] and [`Cache`] values directly.
537 ///
538 /// The default is to count retired instructions, or
539 /// `Hardware::INSTRUCTIONS` events.
540 ///
541 /// For example, to count level 1 data cache references and misses, pass the
542 /// appropriate `events::Cache` values:
543 ///
544 /// # fn main() -> std::io::Result<()> {
545 /// use perf_event::{Builder, Group};
546 /// use perf_event::events::{Cache, CacheOp, CacheResult, WhichCache};
547 ///
548 /// const ACCESS: Cache = Cache {
549 /// which: WhichCache::L1D,
550 /// operation: CacheOp::READ,
551 /// result: CacheResult::ACCESS,
552 /// };
553 /// const MISS: Cache = Cache { result: CacheResult::MISS, ..ACCESS };
554 ///
555 /// let mut group = Group::new()?;
556 /// let access_counter = Builder::new().group(&mut group).kind(ACCESS).build()?;
557 /// let miss_counter = Builder::new().group(&mut group).kind(MISS).build()?;
558 /// # Ok(()) }
559 ///
560 /// [`Hardware`]: events::Hardware
561 /// [`Software`]: events::Software
562 /// [`Cache`]: events::Cache
563 pub fn kind<K: Into<Event>>(mut self, kind: K) -> Builder<'a> {
564 let kind = kind.into();
565 self.attrs.type_ = kind.r#type();
566 self.attrs.config = kind.config();
567 self
568 }
569
570 /// Place the counter in the given [`Group`]. Groups allow a set of counters
571 /// to be enabled, disabled, or read as a single atomic operation, so that
572 /// the counts can be usefully compared.
573 ///
574 /// [`Group`]: struct.Group.html
575 pub fn group(mut self, group: &'a mut Group) -> Builder<'a> {
576 self.group = Some(group);
577
578 // man page: "Members of a group are usually initialized with disabled
579 // set to zero."
580 self.attrs.set_disabled(0);
581
582 self
583 }
584
585 /// Construct a [`Counter`] according to the specifications made on this
586 /// `Builder`.
587 ///
588 /// A freshly built `Counter` is disabled. To begin counting events, you
589 /// must call [`enable`] on the `Counter` or the `Group` to which it belongs.
590 ///
591 /// If the `Builder` requests features that the running kernel does not
592 /// support, it returns `Err(e)` where `e.kind() == ErrorKind::Other` and
593 /// `e.raw_os_error() == Some(libc::E2BIG)`.
594 ///
595 /// Unfortunately, problems in counter configuration are detected at this
596 /// point, by the kernel, not earlier when the offending request is made on
597 /// the `Builder`. The kernel's returned errors are not always helpful.
598 ///
599 /// [`Counter`]: struct.Counter.html
600 /// [`enable`]: struct.Counter.html#method.enable
601 pub fn build(mut self) -> std::io::Result<Counter> {
602 let cpu = match self.cpu {
603 Some(cpu) => cpu as c_int,
604 None => -1,
605 };
606 let (pid, flags) = self.who.as_args();
607 let group_fd = match self.group {
608 Some(ref mut g) => {
609 g.max_members += 1;
610 g.file.as_raw_fd() as c_int
611 }
612 None => -1,
613 };
614
615 let file = unsafe {
616 File::from_raw_fd(check_errno_syscall(|| {
617 sys::perf_event_open(&mut self.attrs, pid, cpu, group_fd, flags as c_ulong)
618 })?)
619 };
620
621 // If we're going to be part of a Group, retrieve the ID the kernel
622 // assigned us, so we can find our results in a Counts structure. Even
623 // if we're not part of a group, we'll use it in `Debug` output.
624 let mut id = 0_u64;
625 check_errno_syscall(|| unsafe { sys::ioctls::ID(file.as_raw_fd(), &mut id) })?;
626
627 Ok(Counter { file, id })
628 }
629}
630
631impl Counter {
632 /// Return this counter's kernel-assigned unique id.
633 ///
634 /// This can be useful when iterating over [`Counts`].
635 ///
636 /// [`Counts`]: struct.Counts.html
637 pub fn id(&self) -> u64 {
638 self.id
639 }
640
641 /// Allow this `Counter` to begin counting its designated event.
642 ///
643 /// This does not affect whatever value the `Counter` had previously; new
644 /// events add to the current count. To clear a `Counter`, use the
645 /// [`reset`] method.
646 ///
647 /// Note that `Group` also has an [`enable`] method, which enables all
648 /// its member `Counter`s as a single atomic operation.
649 ///
650 /// [`reset`]: #method.reset
651 /// [`enable`]: struct.Group.html#method.enable
652 pub fn enable(&mut self) -> io::Result<()> {
653 check_errno_syscall(|| unsafe { sys::ioctls::ENABLE(self.file.as_raw_fd(), 0) }).map(|_| ())
654 }
655
656 /// Make this `Counter` stop counting its designated event. Its count is
657 /// unaffected.
658 ///
659 /// Note that `Group` also has a [`disable`] method, which disables all
660 /// its member `Counter`s as a single atomic operation.
661 ///
662 /// [`disable`]: struct.Group.html#method.disable
663 pub fn disable(&mut self) -> io::Result<()> {
664 check_errno_syscall(|| unsafe { sys::ioctls::DISABLE(self.file.as_raw_fd(), 0) })
665 .map(|_| ())
666 }
667
668 /// Reset the value of this `Counter` to zero.
669 ///
670 /// Note that `Group` also has a [`reset`] method, which resets all
671 /// its member `Counter`s as a single atomic operation.
672 ///
673 /// [`reset`]: struct.Group.html#method.reset
674 pub fn reset(&mut self) -> io::Result<()> {
675 check_errno_syscall(|| unsafe { sys::ioctls::RESET(self.file.as_raw_fd(), 0) }).map(|_| ())
676 }
677
678 /// Return this `Counter`'s current value as a `u64`.
679 ///
680 /// Consider using the [`read_count_and_time`] method instead of this one. Some
681 /// counters are implemented in hardware, and the processor can support only
682 /// a certain number running at a time. If more counters are requested than
683 /// the hardware can support, the kernel timeshares them on the hardware.
684 /// This method gives you no indication whether this has happened;
685 /// `read_count_and_time` does.
686 ///
687 /// Note that `Group` also has a [`read`] method, which reads all
688 /// its member `Counter`s' values at once.
689 ///
690 /// [`read`]: Group::read
691 /// [`read_count_and_time`]: Counter::read_count_and_time
692 pub fn read(&mut self) -> io::Result<u64> {
693 Ok(self.read_count_and_time()?.count)
694 }
695
696 /// Return this `Counter`'s current value and timesharing data.
697 ///
698 /// Some counters are implemented in hardware, and the processor can run
699 /// only a fixed number of them at a time. If more counters are requested
700 /// than the hardware can support, the kernel timeshares them on the
701 /// hardware.
702 ///
703 /// This method returns a [`CountAndTime`] struct, whose `count` field holds
704 /// the counter's value, and whose `time_enabled` and `time_running` fields
705 /// indicate how long you had enabled the counter, and how long the counter
706 /// was actually scheduled on the processor. This lets you detect whether
707 /// the counter was timeshared, and adjust your use accordingly. Times
708 /// are reported in nanoseconds.
709 ///
710 /// # use perf_event::Builder;
711 /// # fn main() -> std::io::Result<()> {
712 /// # let mut counter = Builder::new().build()?;
713 /// let cat = counter.read_count_and_time()?;
714 /// if cat.time_running == 0 {
715 /// println!("No data collected.");
716 /// } else if cat.time_running < cat.time_enabled {
717 /// // Note: this way of scaling is accurate, but `u128` division
718 /// // is usually implemented in software, which may be slow.
719 /// println!("{} instructions (estimated)",
720 /// (cat.count as u128 *
721 /// cat.time_enabled as u128 / cat.time_running as u128) as u64);
722 /// } else {
723 /// println!("{} instructions", cat.count);
724 /// }
725 /// # Ok(()) }
726 ///
727 /// Note that `Group` also has a [`read`] method, which reads all
728 /// its member `Counter`s' values at once.
729 ///
730 /// [`read`]: Group::read
731 pub fn read_count_and_time(&mut self) -> io::Result<CountAndTime> {
732 let mut buf = [0_u64; 3];
733 self.file.read_exact(u64::slice_as_bytes_mut(&mut buf))?;
734
735 let cat = CountAndTime {
736 count: buf[0],
737 time_enabled: buf[1],
738 time_running: buf[2],
739 };
740
741 // Does the kernel ever return nonsense?
742 assert!(cat.time_running <= cat.time_enabled);
743
744 Ok(cat)
745 }
746}
747
748impl std::fmt::Debug for Counter {
749 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
750 write!(
751 fmt,
752 "Counter {{ fd: {}, id: {} }}",
753 self.file.as_raw_fd(),
754 self.id
755 )
756 }
757}
758
759impl Group {
760 /// Construct a new, empty `Group`.
761 #[allow(unused_parens)]
762 pub fn new() -> io::Result<Group> {
763 // Open a placeholder perf counter that we can add other events to.
764 let mut attrs = perf_event_attr {
765 size: std::mem::size_of::<perf_event_attr>() as u32,
766 type_: sys::bindings::PERF_TYPE_SOFTWARE,
767 config: sys::bindings::PERF_COUNT_SW_DUMMY as u64,
768 ..perf_event_attr::default()
769 };
770
771 attrs.set_disabled(1);
772 attrs.set_exclude_kernel(1);
773 attrs.set_exclude_hv(1);
774
775 // Arrange to be able to identify the counters we read back.
776 attrs.read_format = (sys::bindings::PERF_FORMAT_TOTAL_TIME_ENABLED
777 | sys::bindings::PERF_FORMAT_TOTAL_TIME_RUNNING
778 | sys::bindings::PERF_FORMAT_ID
779 | sys::bindings::PERF_FORMAT_GROUP) as u64;
780
781 let file = unsafe {
782 File::from_raw_fd(check_errno_syscall(|| {
783 sys::perf_event_open(&mut attrs, 0, -1, -1, 0)
784 })?)
785 };
786
787 // Retrieve the ID the kernel assigned us.
788 let mut id = 0_u64;
789 check_errno_syscall(|| unsafe { sys::ioctls::ID(file.as_raw_fd(), &mut id) })?;
790
791 Ok(Group {
792 file,
793 id,
794 max_members: 1,
795 })
796 }
797
798 /// Allow all `Counter`s in this `Group` to begin counting their designated
799 /// events, as a single atomic operation.
800 ///
801 /// This does not affect whatever values the `Counter`s had previously; new
802 /// events add to the current counts. To clear the `Counter`s, use the
803 /// [`reset`] method.
804 ///
805 /// [`reset`]: #method.reset
806 pub fn enable(&mut self) -> io::Result<()> {
807 self.generic_ioctl(sys::ioctls::ENABLE)
808 }
809
810 /// Make all `Counter`s in this `Group` stop counting their designated
811 /// events, as a single atomic operation. Their counts are unaffected.
812 pub fn disable(&mut self) -> io::Result<()> {
813 self.generic_ioctl(sys::ioctls::DISABLE)
814 }
815
816 /// Reset all `Counter`s in this `Group` to zero, as a single atomic operation.
817 pub fn reset(&mut self) -> io::Result<()> {
818 self.generic_ioctl(sys::ioctls::RESET)
819 }
820
821 /// Perform some group ioctl.
822 ///
823 /// `f` must be a syscall that sets `errno` and returns `-1` on failure.
824 fn generic_ioctl(&mut self, f: unsafe fn(c_int, c_uint) -> c_int) -> io::Result<()> {
825 check_errno_syscall(|| unsafe {
826 f(self.file.as_raw_fd(), sys::bindings::PERF_IOC_FLAG_GROUP)
827 })
828 .map(|_| ())
829 }
830
831 /// Return the values of all the `Counter`s in this `Group` as a [`Counts`]
832 /// value.
833 ///
834 /// A `Counts` value is a map from specific `Counter`s to their values. You
835 /// can find a specific `Counter`'s value by indexing:
836 ///
837 /// ```ignore
838 /// let mut group = Group::new()?;
839 /// let counter1 = Builder::new().group(&mut group).kind(...).build()?;
840 /// let counter2 = Builder::new().group(&mut group).kind(...).build()?;
841 /// ...
842 /// let counts = group.read()?;
843 /// println!("Rhombus inclinations per taxi medallion: {} / {} ({:.0}%)",
844 /// counts[&counter1],
845 /// counts[&counter2],
846 /// (counts[&counter1] as f64 / counts[&counter2] as f64) * 100.0);
847 /// ```
848 ///
849 /// [`Counts`]: struct.Counts.html
850 pub fn read(&mut self) -> io::Result<Counts> {
851 // Since we passed `PERF_FORMAT_{ID,GROUP,TOTAL_TIME_{ENABLED,RUNNING}}`,
852 // the data we'll read has the form:
853 //
854 // struct read_format {
855 // u64 nr; /* The number of events */
856 // u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
857 // u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
858 // struct {
859 // u64 value; /* The value of the event */
860 // u64 id; /* if PERF_FORMAT_ID */
861 // } values[nr];
862 // };
863 let mut data = vec![0_u64; 3 + 2 * self.max_members];
864 assert_eq!(
865 self.file.read(u64::slice_as_bytes_mut(&mut data))?,
866 std::mem::size_of_val(&data[..])
867 );
868
869 let counts = Counts { data };
870
871 // CountsIter assumes that the group's dummy count appears first.
872 assert_eq!(counts.nth_ref(0).0, self.id);
873
874 // Does the kernel ever return nonsense?
875 assert!(counts.time_running() <= counts.time_enabled());
876
877 // Update `max_members` for the next read.
878 self.max_members = counts.len();
879
880 Ok(counts)
881 }
882}
883
884impl std::fmt::Debug for Group {
885 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
886 write!(
887 fmt,
888 "Group {{ fd: {}, id: {} }}",
889 self.file.as_raw_fd(),
890 self.id
891 )
892 }
893}
894
895impl Counts {
896 /// Return the number of counters this `Counts` holds results for.
897 #[allow(clippy::len_without_is_empty)] // Groups are never empty.
898 pub fn len(&self) -> usize {
899 self.data[0] as usize
900 }
901
902 /// Return the number of nanoseconds the `Group` was enabled that
903 /// contributed to this `Counts`' contents.
904 pub fn time_enabled(&self) -> u64 {
905 self.data[1]
906 }
907
908 /// Return the number of nanoseconds the `Group` was actually collecting
909 /// counts that contributed to this `Counts`' contents.
910 pub fn time_running(&self) -> u64 {
911 self.data[2]
912 }
913
914 /// Return a range of indexes covering the count and id of the `n`'th counter.
915 fn nth_index(n: usize) -> std::ops::Range<usize> {
916 let base = 3 + 2 * n;
917 base..base + 2
918 }
919
920 /// Return the id and count of the `n`'th counter. This returns a reference
921 /// to the count, for use by the `Index` implementation.
922 fn nth_ref(&self, n: usize) -> (u64, &u64) {
923 let id_val = &self.data[Counts::nth_index(n)];
924
925 // (id, &value)
926 (id_val[1], &id_val[0])
927 }
928}
929
930/// An iterator over the counter values in a [`Counts`], returned by
931/// [`Group::read`].
932///
933/// Each item is a pair `(id, &value)`, where `id` is the number assigned to the
934/// counter by the kernel (see `Counter::id`), and `value` is that counter's
935/// value.
936///
937/// [`Counts`]: struct.Counts.html
938/// [`Counter::id`]: struct.Counter.html#method.id
939/// [`Group::read`]: struct.Group.html#method.read
940pub struct CountsIter<'c> {
941 counts: &'c Counts,
942 next: usize,
943}
944
945impl<'c> Iterator for CountsIter<'c> {
946 type Item = (u64, &'c u64);
947 fn next(&mut self) -> Option<(u64, &'c u64)> {
948 if self.next >= self.counts.len() {
949 return None;
950 }
951 let result = self.counts.nth_ref(self.next);
952 self.next += 1;
953 Some(result)
954 }
955}
956
957impl<'c> IntoIterator for &'c Counts {
958 type Item = (u64, &'c u64);
959 type IntoIter = CountsIter<'c>;
960 fn into_iter(self) -> CountsIter<'c> {
961 CountsIter {
962 counts: self,
963 next: 1, // skip the `Group` itself, it's just a dummy.
964 }
965 }
966}
967
968impl Counts {
969 /// Return the value recorded for `member` in `self`, or `None` if `member`
970 /// is not present.
971 ///
972 /// If you know that `member` is in the group, you can simply index:
973 ///
974 /// # fn main() -> std::io::Result<()> {
975 /// # use perf_event::{Builder, Group};
976 /// # let mut group = Group::new()?;
977 /// # let cycle_counter = Builder::new().group(&mut group).build()?;
978 /// # let counts = group.read()?;
979 /// let cycles = counts[&cycle_counter];
980 /// # Ok(()) }
981 pub fn get(&self, member: &Counter) -> Option<&u64> {
982 self.into_iter()
983 .find(|&(id, _)| id == member.id)
984 .map(|(_, value)| value)
985 }
986
987 /// Return an iterator over the counts in `self`.
988 ///
989 /// # fn main() -> std::io::Result<()> {
990 /// # use perf_event::Group;
991 /// # let counts = Group::new()?.read()?;
992 /// for (id, value) in &counts {
993 /// println!("Counter id {} has value {}", id, value);
994 /// }
995 /// # Ok(()) }
996 ///
997 /// Each item is a pair `(id, &value)`, where `id` is the number assigned to
998 /// the counter by the kernel (see `Counter::id`), and `value` is that
999 /// counter's value.
1000 pub fn iter(&self) -> CountsIter {
1001 <&Counts as IntoIterator>::into_iter(self)
1002 }
1003}
1004
1005impl std::ops::Index<&Counter> for Counts {
1006 type Output = u64;
1007 fn index(&self, index: &Counter) -> &u64 {
1008 self.get(index).unwrap()
1009 }
1010}
1011
1012impl std::fmt::Debug for Counts {
1013 fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
1014 fmt.debug_map().entries(self.into_iter()).finish()
1015 }
1016}
1017
1018/// A type whose values can be safely accessed as a slice of bytes.
1019///
1020/// # Safety
1021///
1022/// `Self` must be a type such that storing a value in memory
1023/// initializes all the bytes of that memory, so that
1024/// `slice_as_bytes_mut` can never expose uninitialized bytes to the
1025/// caller.
1026unsafe trait SliceAsBytesMut: Sized {
1027 fn slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8] {
1028 unsafe {
1029 std::slice::from_raw_parts_mut(
1030 slice.as_mut_ptr() as *mut u8,
1031 std::mem::size_of_val(slice),
1032 )
1033 }
1034 }
1035}
1036
1037unsafe impl SliceAsBytesMut for u64 {}
1038
1039/// Produce an `io::Result` from an errno-style system call.
1040///
1041/// An 'errno-style' system call is one that reports failure by returning -1 and
1042/// setting the C `errno` value when an error occurs.
1043fn check_errno_syscall<F, R>(f: F) -> io::Result<R>
1044where
1045 F: FnOnce() -> R,
1046 R: PartialOrd + Default,
1047{
1048 let result = f();
1049 if result < R::default() {
1050 Err(io::Error::last_os_error())
1051 } else {
1052 Ok(result)
1053 }
1054}
1055
1056#[test]
1057fn simple_build() {
1058 Builder::new()
1059 .build()
1060 .expect("Couldn't build default Counter");
1061}
1062
1063#[test]
1064#[cfg(target_os = "linux")]
1065fn test_error_code_is_correct() {
1066 // This configuration should always result in EINVAL
1067 let builder = Builder::new()
1068 // CPU_CLOCK is literally always supported so we don't have to worry
1069 // about test failures when in VMs.
1070 .kind(events::Software::CPU_CLOCK)
1071 // There should _hopefully_ never be a system with this many CPUs.
1072 .one_cpu(i32::MAX as usize);
1073
1074 match builder.build() {
1075 Ok(_) => panic!("counter construction was not supposed to succeed"),
1076 Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EINVAL)),
1077 }
1078}