perf_event/
lib.rs

1//! A performance monitoring API for Linux.
2//!
3//! This crate provides access to processor and kernel counters for things like
4//! instruction completions, cache references and misses, branch predictions,
5//! context switches, page faults, and so on.
6//!
7//! For example, to compare the number of clock cycles elapsed with the number
8//! of instructions completed during one call to `println!`:
9//!
10//! ```
11//! use perf_event::events::Hardware;
12//! use perf_event::{Builder, Group};
13//!
14//! # fn main() -> std::io::Result<()> {
15//! // A `Group` lets us enable and disable several counters atomically.
16//! let mut group = Group::new()?;
17//! let cycles = group.add(&Builder::new(Hardware::CPU_CYCLES))?;
18//! let insns = group.add(&Builder::new(Hardware::INSTRUCTIONS))?;
19//!
20//! let vec = (0..=51).collect::<Vec<_>>();
21//!
22//! group.enable()?;
23//! println!("{:?}", vec);
24//! group.disable()?;
25//!
26//! let counts = group.read()?;
27//! println!(
28//!     "cycles / instructions: {} / {} ({:.2} cpi)",
29//!     counts[&cycles],
30//!     counts[&insns],
31//!     (counts[&cycles] as f64 / counts[&insns] as f64)
32//! );
33//!
34//! Ok(())
35//! # }
36//! ```
37//!
38//! This crate is built on top of the Linux [`perf_event_open`][man] system
39//! call; that documentation has the authoritative explanations of exactly what
40//! all the counters mean.
41//!
42//! There are two main types for measurement:
43//!
44//! - A [`Counter`] is an individual counter. Use [`Builder`] to construct one.
45//!
46//! - A [`Group`] is a collection of counters that can be enabled and disabled
47//!   atomically, so that they cover exactly the same period of execution,
48//!   allowing meaningful comparisons of the individual values. You can
49//!   construct one via [`Group::new`] or use [`Builder`] to construct one with
50//!   custom settings.
51//!
52//! If you're familiar with the kernel API already:
53//!
54//! - A `Builder` holds the arguments to a `perf_event_open` call: a `struct
55//!   perf_event_attr` and a few other fields.
56//!
57//! - `Counter` and `Group` objects are just event file descriptors, together
58//!   with their kernel id numbers, and some other details you need to actually
59//!   use them. They're different types because they yield different types of
60//!   results, and because you can't retrieve a `Group`'s counts without knowing
61//!   how many members it has.
62//!
63//! ### Call for PRs
64//!
65//! Linux's `perf_event_open` API can report all sorts of things this crate
66//! doesn't yet understand: stack traces, logs of executable and shared library
67//! activity, tracepoints, kprobes, uprobes, and so on. And beyond the counters
68//! in the kernel header files, there are others that can only be found at
69//! runtime by consulting `sysfs`, specific to particular processors and
70//! devices. For example, modern Intel processors have counters that measure
71//! power consumption in Joules.
72//!
73//! If you find yourself in need of something this crate doesn't support, please
74//! consider submitting a pull request.
75//!
76//! [man]: https://www.mankier.com/2/perf_event_open
77
78#![cfg_attr(debug_assertions, warn(missing_docs))]
79#![cfg_attr(not(debug_assertions), deny(missing_docs))]
80// The bitflags macro is generating this lint internally.
81#![allow(clippy::assign_op_pattern)]
82
83/// A helper macro for silencing warnings when a type is only implemented so
84/// that it can be linked in the docs.
85macro_rules! used_in_docs {
86    ($t:ident) => {
87        const _: () = {
88            // Using a module here means that this macro can accept any identifier that
89            // would normally be used in an import statement.
90            mod use_item {
91                #[allow(unused_imports)]
92                use super::$t;
93            }
94        };
95    };
96}
97
98use std::convert::TryInto;
99use std::fs::File;
100use std::os::fd::{AsRawFd, IntoRawFd, RawFd};
101use std::time::Duration;
102use std::{fmt, io};
103
104use crate::data::endian::Native;
105use crate::data::parse::ParseConfig;
106use crate::sys::bindings::PERF_IOC_FLAG_GROUP;
107use crate::sys::ioctls;
108
109pub mod events;
110
111mod builder;
112mod flags;
113mod group;
114mod group_data;
115mod sampler;
116
117// Make sure the examples in the readme are tested.
118#[doc = include_str!("../README.md")]
119mod readme {}
120
121#[cfg(feature = "hooks")]
122pub mod hooks;
123
124// When the `"hooks"` feature is not enabled, call directly into
125// `perf-event-open-sys`.
126// When the `"hooks"` feature is enabled, `sys` functions allow for
127// interposed functions that provide simulated results for testing.
128#[cfg(feature = "hooks")]
129use hooks::sys;
130/// Support for parsing data contained within `Record`s.
131///
132/// Note that this module is actually just the [`perf-event-data`][ped] crate.
133/// The documentation has been inlined here for convenience.
134// TODO: Directly linking to the crate causes an ICE in rustdoc. It is fixed in
135//       nightly but not in the latest stable.
136///
137/// [ped]: http://docs.rs/perf-event-data
138///
139/// # perf-event-data
140#[doc(inline)]
141pub use perf_event_data as data;
142#[cfg(not(feature = "hooks"))]
143use perf_event_open_sys as sys;
144
145pub use crate::builder::{Builder, UnsupportedOptionsError};
146#[doc(inline)]
147pub use crate::data::{ReadFormat, SampleFlags as SampleFlag};
148pub use crate::flags::{Clock, SampleBranchFlag, SampleSkid};
149pub use crate::group::Group;
150pub use crate::group_data::{GroupData, GroupEntry, GroupIter};
151pub use crate::sampler::{Record, Sampler, UserReadData};
152
153/// A counter for a single kernel or hardware event.
154///
155/// A counter represents a single performance monitoring counter. When building
156/// the counter you select the event you would like it to count. Once the
157/// counter is created, then you can enable or disable it, call its [`read`]
158/// method to retrieve its current value, and reset it to zero.
159///
160/// # Groups
161/// The kernel allows for counters to be grouped together. A group of counters
162/// will be scheduled onto the CPU as a unit. This allows you to directly
163/// compare the values collected by multiple counters.
164///
165/// There are two ways to go about working with groups:
166/// - Use the [`Group`] type. It is not configurable but it makes groups easy to
167///   setup and use.
168/// - Pick one `Counter` to be a group leader, create the other counters with
169///   [`Builder::build_with_group`] and use [`enable_group`], [`disable_group`],
170///   and [`reset_group`] on any of its members to control the group. To read
171///   all counters in the group at once you'll need to create at least one
172///   counter with [`ReadFormat::GROUP`] so that [`read_group`] will read the
173///   entire group.
174///
175/// A counter represents a single performance monitoring counter. While
176/// creating the counter - via [`Builder`] - you select the event you would
177/// like to count. Once the counter is created, then you can enable or disable
178/// it, call its [`read`] method to retrieve the current count (or counts if
179/// it is a [`Group`]), and reset it to zero.
180///
181/// [`read`]: crate::Counter::read
182/// [`read_group`]: Self::read_group
183/// [`reset_group`]: Self::reset_group
184/// [`enable_group`]: Self::enable_group
185/// [`disable_group`]: Self::disable_group
186pub struct Counter {
187    /// The file descriptor for this counter, returned by `perf_event_open`.
188    ///
189    /// When a `Counter` is dropped, this `File` is dropped, and the kernel
190    /// removes the counter from any group it belongs to.
191    file: File,
192
193    /// The unique id assigned to this counter by the kernel.
194    id: u64,
195
196    /// The parse config used by this counter.
197    config: ParseConfig<Native>,
198
199    /// If we are a `Group`, then this is the count of how many members we have.
200    member_count: u32,
201}
202
203impl Counter {
204    /// Common initialization code shared between counters and groups.
205    pub(crate) fn new_internal(file: File, config: ParseConfig<Native>) -> std::io::Result<Self> {
206        let mut counter = Self {
207            file,
208            id: 0,
209            config,
210            member_count: 1,
211        };
212
213        // If we are part of a group then the id is used to find results in the
214        // Counts structure. Otherwise, it's just used for debug output.
215        let mut id = 0;
216        counter.ioctl(|fd| unsafe { ioctls::ID(fd, &mut id) })?;
217        counter.id = id;
218
219        Ok(counter)
220    }
221
222    /// Return this counter's kernel-assigned unique id.
223    pub fn id(&self) -> u64 {
224        self.id
225    }
226
227    /// The [`ParseConfig`] for this `Counter`.
228    pub fn config(&self) -> &ParseConfig<Native> {
229        &self.config
230    }
231
232    /// Allow this `Counter` to begin counting its designated event.
233    ///
234    /// This does not affect whatever value the `Counter` had previously; new
235    /// events add to the current count. To clear a `Counter`, use [`reset`].
236    ///
237    /// Note that, depending on how it was configured, a counter may start off
238    /// enabled or be automatically enabled by the kernel when an event occurs.
239    /// For example, setting [`enable_on_exec`] will cause this counter to be
240    /// automatically enabled when the current process calls `execve(2)`.
241    ///
242    /// If you want to enable all counters in the same group as this one then
243    /// use [`enable_group`] instead.
244    ///
245    /// # Examples
246    /// Enable an individual counter:
247    /// ```
248    /// use perf_event::events::Hardware;
249    /// use perf_event::Builder;
250    ///
251    /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).build()?;
252    /// counter.enable()?;
253    /// // ...
254    /// assert_ne!(counter.read()?, 0);
255    /// # std::io::Result::Ok(())
256    /// ```
257    ///
258    /// [`Group`]: crate::Group
259    /// [`reset`]: Self::reset
260    /// [`enable_group`]: Self::enable_group
261    /// [`enable_on_exec`]: crate::Builder::enable_on_exec
262    pub fn enable(&mut self) -> io::Result<()> {
263        self.ioctl(|fd| unsafe { ioctls::ENABLE(fd, 0) })
264    }
265
266    /// Enable all counters in the same group as this one.
267    ///
268    /// This does not affect whatever value the `Counter` had previously; new
269    /// events add to the current count. To clear a counter group, use
270    /// [`reset_group`].
271    ///
272    /// See [`enable`] for the version that only applies to the current
273    /// counter.
274    ///
275    /// # Examples
276    /// Enable all counters in a group:
277    /// ```
278    /// use perf_event::events::Hardware;
279    /// use perf_event::{Builder, Group};
280    ///
281    /// let mut group = Group::new()?;
282    /// let mut cycles = Builder::new(Hardware::CPU_CYCLES).build_with_group(&mut group)?;
283    /// group.enable()?;
284    /// // ...
285    /// assert_ne!(cycles.read()?, 0);
286    /// # std::io::Result::Ok(())
287    /// ```
288    ///
289    /// [`enable`]: Self::enable
290    /// [`reset_group`]: Self::reset_group
291    pub fn enable_group(&mut self) -> io::Result<()> {
292        self.ioctl(|fd| unsafe { ioctls::ENABLE(fd, PERF_IOC_FLAG_GROUP) })
293    }
294
295    /// Make this `Counter` stop counting its designated event.
296    ///
297    /// This does not affect the value of this `Counter`.
298    ///
299    /// To disable all counters in the group use
300    /// [`disable_group`](Self::disable_group).
301    ///
302    /// # Examples
303    /// Disable a single counter:
304    /// ```
305    /// use perf_event::events::Hardware;
306    /// use perf_event::Builder;
307    ///
308    /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).build()?;
309    /// counter.enable()?;
310    ///
311    /// // Counter is continuously updating
312    /// let val1 = counter.read()?;
313    /// let val2 = counter.read()?;
314    /// counter.disable()?;
315    ///
316    /// // Counter is no longer updating
317    /// let val3 = counter.read()?;
318    /// let val4 = counter.read()?;
319    ///
320    /// assert_ne!(val1, val2);
321    /// assert_eq!(val3, val4);
322    /// # std::io::Result::Ok(())
323    /// ```
324    ///
325    /// [`Group`]: crate::Group
326    /// [`disable`]: struct.Group.html#method.disable
327    pub fn disable(&mut self) -> io::Result<()> {
328        self.ioctl(|fd| unsafe { ioctls::DISABLE(fd, 0) })
329    }
330
331    /// Disable all counters in the same group as this one.
332    ///
333    /// This does not affect the counter values.
334    ///
335    /// To disable only this counter use [`disable`].
336    ///
337    /// [`disable`]: Self::disable
338    pub fn disable_group(&mut self) -> io::Result<()> {
339        self.ioctl(|fd| unsafe { ioctls::DISABLE(fd, PERF_IOC_FLAG_GROUP) })
340    }
341
342    /// Reset the value of this `Counter` to zero.
343    ///
344    /// To reset the value of all counters in the current group use
345    /// [`reset_group`](Self::reset_group).
346    ///
347    /// # Examples
348    /// Reset a single counter
349    /// ```
350    /// use perf_event::events::Hardware;
351    /// use perf_event::Builder;
352    ///
353    /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).build()?;
354    /// counter.enable()?;
355    /// // ...
356    /// counter.disable()?;
357    ///
358    /// assert_ne!(counter.read()?, 0);
359    /// counter.reset()?;
360    /// assert_eq!(counter.read()?, 0);
361    /// # std::io::Result::Ok(())
362    /// ```
363    pub fn reset(&mut self) -> io::Result<()> {
364        self.ioctl(|fd| unsafe { ioctls::RESET(fd, 0) })
365    }
366
367    /// Reset the value of all counters in the same group as this one to zero.
368    ///
369    /// To only reset the value of this counter use [`reset`](Self::reset).
370    pub fn reset_group(&mut self) -> io::Result<()> {
371        self.ioctl(|fd| unsafe { ioctls::RESET(fd, PERF_IOC_FLAG_GROUP) })
372    }
373
374    /// Attach an eBPF program to this counter.
375    ///
376    /// This will only work if this counter was created as a kprobe
377    /// tracepoint event.
378    ///
379    /// This method corresponds to the `IOC_SET_BPF` ioctl.
380    pub fn set_bpf(&mut self, bpf: RawFd) -> io::Result<()> {
381        self.ioctl(|fd| unsafe { ioctls::SET_BPF(fd, bpf as _) })
382            .map(drop)
383    }
384
385    /// Map a buffer for samples from this counter, returning a [`Sampler`]
386    /// that can be used to access them.
387    ///
388    /// There are some restrictions on the size of the mapped buffer. To
389    /// accomodate this `map_len` will always be rounded up to the next
390    /// power-of-two multiple of the system page size. There will always
391    /// be at least two pages allocated for the ring buffer: one for the
392    /// control data structures, and one for actual data.
393    ///
394    /// # Example
395    /// This example shows creating a sample to record mmap events within the
396    /// current process. If you do this early enough, you can then track what
397    /// libraries your process is loading.
398    /// ```
399    /// use perf_event::events::Software;
400    /// use perf_event::Builder;
401    ///
402    /// let mut sampler = Builder::new(Software::DUMMY)
403    ///     .mmap(true)
404    ///     .build()?
405    ///     .sampled(128)?;
406    /// # std::io::Result::Ok(())
407    /// ```
408    pub fn sampled(self, map_len: usize) -> io::Result<Sampler> {
409        let pagesize =
410            check_errno_syscall(|| unsafe { libc::sysconf(libc::_SC_PAGESIZE) })? as usize;
411
412        let len = pagesize
413            + map_len
414                .checked_next_power_of_two()
415                .unwrap_or((usize::MAX >> 1) + 1)
416                .max(pagesize);
417
418        let mmap = memmap2::MmapOptions::new().len(len).map_raw(&self.file)?;
419
420        Ok(Sampler::new(self, mmap))
421    }
422
423    /// Helper function for doing ioctls on a counter.
424    pub(crate) fn ioctl<F>(&self, ioctl: F) -> io::Result<()>
425    where
426        F: FnOnce(RawFd) -> libc::c_int,
427    {
428        check_errno_syscall(|| ioctl(self.as_raw_fd())).map(drop)
429    }
430}
431
432impl Counter {
433    /// Return this `Counter`'s current value as a `u64`.
434    ///
435    /// Consider using [`read_full`] or (if read_format has the required flags)
436    /// [`read_count_and_time`] instead. There are limitations around how
437    /// many hardware counters can be on a single CPU at a time. If more
438    /// counters are requested than the hardware can support then the kernel
439    /// will timeshare them on the hardware. Looking at just the counter value
440    /// gives you no indication that this has happened.
441    ///
442    /// If you would like to read the values for an entire group then you will
443    /// need to use [`read_group`] (and set [`ReadFormat::GROUP`]) instead.
444    ///
445    /// [`read_full`]: Self::read_full
446    /// [`read_group`]: Self::read_group
447    /// [`read_count_and_time`]: Self::read_count_and_time
448    /// [`ReadFormat::GROUP`]: ReadFormat::GROUP
449    ///
450    /// # Errors
451    /// This function may return errors in the following notable cases:
452    /// - `ENOSPC` is returned if the `read_format` that this `Counter` was
453    ///   built with does not match the format of the data. This can also occur
454    ///   if `read_format` contained options not supported by this crate.
455    /// - If the counter is part of a group and was unable to be pinned to the
456    ///   CPU then reading will return an error with kind [`UnexpectedEof`].
457    ///
458    /// Other errors are also possible under unexpected conditions (e.g. `EBADF`
459    /// if the file descriptor is closed).
460    ///
461    /// [`UnexpectedEof`]: io::ErrorKind::UnexpectedEof
462    ///
463    /// # Example
464    /// ```
465    /// use perf_event::events::Hardware;
466    /// use perf_event::Builder;
467    ///
468    /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).enabled(true).build()?;
469    ///
470    /// let instrs = counter.read()?;
471    /// # std::io::Result::Ok(())
472    /// ```
473    pub fn read(&mut self) -> io::Result<u64> {
474        Ok(self.read_full()?.count())
475    }
476
477    /// Return all data that this `Counter` is configured to provide.
478    ///
479    /// The exact fields that are returned within the [`CounterData`] struct
480    /// depend on what was specified for `read_format` when constructing this
481    /// counter. This method is the only one that gives access to all values
482    /// returned by the kernel.
483    ///
484    /// If this `Counter` was created with [`ReadFormat::GROUP`] then this will
485    /// read the entire group but only return the data for this specific
486    /// counter.
487    ///
488    /// # Errors
489    /// This function may return errors in the following notable cases:
490    /// - `ENOSPC` is returned if the `read_format` that this `Counter` was
491    ///   built with does not match the format of the data. This can also occur
492    ///   if `read_format` contained options not supported by this crate.
493    /// - If the counter is part of a group and was unable to be pinned to the
494    ///   CPU then reading will return an error with kind [`UnexpectedEof`].
495    ///
496    /// Other errors are also possible under unexpected conditions (e.g. `EBADF`
497    /// if the file descriptor is closed).
498    ///
499    /// [`UnexpectedEof`]: io::ErrorKind::UnexpectedEof
500    ///
501    /// # Example
502    /// ```
503    /// use std::time::Duration;
504    ///
505    /// use perf_event::events::Hardware;
506    /// use perf_event::{Builder, ReadFormat};
507    ///
508    /// let mut counter = Builder::new(Hardware::INSTRUCTIONS)
509    ///     .read_format(ReadFormat::TOTAL_TIME_RUNNING)
510    ///     .enabled(true)
511    ///     .build()?;
512    /// // ...
513    /// let data = counter.read_full()?;
514    /// let instructions = data.count();
515    /// let time_running = data.time_running().unwrap();
516    /// let ips = instructions as f64 / time_running.as_secs_f64();
517    ///
518    /// println!("instructions/s: {ips}");
519    /// # std::io::Result::Ok(())
520    /// ```
521    pub fn read_full(&mut self) -> io::Result<CounterData> {
522        if !self.is_group() {
523            return self.do_read_single();
524        }
525
526        let group = self.do_read_group()?;
527        let entry = group.get(self).unwrap();
528        let data = crate::data::ReadValue::from_group_and_entry(&group.data, &entry.0);
529
530        Ok(CounterData(data))
531    }
532
533    /// Read the values of all the counters in the current group.
534    ///
535    /// Note that unless [`ReadFormat::GROUP`] was specified when building this
536    /// `Counter` this will only read the data for the current `Counter`.
537    ///
538    /// # Errors
539    /// This function may return errors in the following notable cases:
540    /// - `ENOSPC` is returned if the `read_format` that this `Counter` was
541    ///   built with does not match the format of the data. This can also occur
542    ///   if `read_format` contained options not supported by this crate.
543    /// - If the counter is part of a group and was unable to be pinned to the
544    ///   CPU then reading will return an error with kind [`UnexpectedEof`].
545    ///
546    /// Other errors are also possible under unexpected conditions (e.g. `EBADF`
547    /// if the file descriptor is closed).
548    ///
549    /// [`UnexpectedEof`]: io::ErrorKind::UnexpectedEof
550    ///
551    /// # Example
552    /// Compute the CPI for a region of code:
553    /// ```
554    /// use perf_event::events::Hardware;
555    /// use perf_event::{Builder, ReadFormat};
556    ///
557    /// let mut instrs = Builder::new(Hardware::INSTRUCTIONS)
558    ///     .read_format(ReadFormat::GROUP)
559    ///     .build()?;
560    /// let mut cycles = Builder::new(Hardware::CPU_CYCLES).build_with_group(&mut instrs)?;
561    ///
562    /// instrs.enable_group()?;
563    /// // ...
564    /// instrs.disable_group()?;
565    ///
566    /// let data = instrs.read_group()?;
567    /// let instrs = data[&instrs];
568    /// let cycles = data[&cycles];
569    ///
570    /// println!("CPI: {}", cycles as f64 / instrs as f64);
571    /// # std::io::Result::Ok(())
572    /// ```
573    pub fn read_group(&mut self) -> io::Result<GroupData> {
574        if self.is_group() {
575            self.do_read_group()
576        } else {
577            Ok(GroupData::new(self.do_read_single()?.0.into()))
578        }
579    }
580
581    /// Return this `Counter`'s current value and timesharing data.
582    ///
583    /// Some counters are implemented in hardware, and the processor can run
584    /// only a fixed number of them at a time. If more counters are requested
585    /// than the hardware can support, the kernel timeshares them on the
586    /// hardware.
587    ///
588    /// This method returns a [`CountAndTime`] struct, whose `count` field holds
589    /// the counter's value, and whose `time_enabled` and `time_running` fields
590    /// indicate how long you had enabled the counter, and how long the counter
591    /// was actually scheduled on the processor. This lets you detect whether
592    /// the counter was timeshared, and adjust your use accordingly. Times
593    /// are reported in nanoseconds.
594    ///
595    /// # Errors
596    /// See the [man page][man] for possible errors when reading from the
597    /// counter. This method will also return an error if `read_format` does
598    /// not include both [`TOTAL_TIME_ENABLED`] and [`TOTAL_TIME_RUNNING`].
599    ///
600    /// # Example
601    /// ```
602    /// # use perf_event::Builder;
603    /// # use perf_event::events::Software;
604    /// #
605    /// # let mut counter = Builder::new(Software::DUMMY).build()?;
606    /// let cat = counter.read_count_and_time()?;
607    /// if cat.time_running == 0 {
608    ///     println!("No data collected.");
609    /// } else if cat.time_running < cat.time_enabled {
610    ///     // Note: this way of scaling is accurate, but `u128` division
611    ///     // is usually implemented in software, which may be slow.
612    ///     println!(
613    ///         "{} instructions (estimated)",
614    ///         (cat.count as u128 * cat.time_enabled as u128 / cat.time_running as u128) as u64
615    ///     );
616    /// } else {
617    ///     println!("{} instructions", cat.count);
618    /// }
619    /// # std::io::Result::Ok(())
620    /// ```
621    ///
622    /// Note that `Group` also has a [`read`] method, which reads all
623    /// its member `Counter`s' values at once.
624    ///
625    /// [`read`]: crate::Group::read
626    /// [`TOTAL_TIME_ENABLED`]: ReadFormat::TOTAL_TIME_ENABLED
627    /// [`TOTAL_TIME_RUNNING`]: ReadFormat::TOTAL_TIME_RUNNING
628    /// [man]: https://www.mankier.com/2/perf_event_open
629    pub fn read_count_and_time(&mut self) -> io::Result<CountAndTime> {
630        let data = self.read_full()?;
631
632        Ok(CountAndTime {
633            count: data.count(),
634            time_enabled: data
635                .time_enabled()
636                .ok_or_else(|| {
637                    io::Error::new(
638                        io::ErrorKind::Other,
639                        "time_enabled was not enabled within read_format",
640                    )
641                })?
642                .as_nanos() as _,
643            time_running: data
644                .time_running()
645                .ok_or_else(|| {
646                    io::Error::new(
647                        io::ErrorKind::Other,
648                        "time_running was not enabled within read_format",
649                    )
650                })?
651                .as_nanos() as _,
652        })
653    }
654
655    fn is_group(&self) -> bool {
656        self.config.read_format().contains(ReadFormat::GROUP)
657    }
658
659    /// Actual read implementation for when `ReadFormat::GROUP` is not set.
660    fn do_read_single(&mut self) -> io::Result<CounterData> {
661        use std::io::Read;
662        use std::mem::size_of;
663
664        use crate::flags::ReadFormatExt;
665
666        debug_assert!(!self.is_group());
667
668        let mut data = [0u8; ReadFormat::MAX_NON_GROUP_SIZE * size_of::<u64>()];
669        let len = self.file.read(&mut data)?;
670
671        if len == 0 {
672            return Err(io::Error::new(
673                io::ErrorKind::UnexpectedEof,
674                "the kernel was unable to schedule the counter or group",
675            ));
676        }
677
678        let mut parser = crate::data::parse::Parser::new(&data[..len], self.config.clone());
679        let value: crate::data::ReadValue = parser
680            .parse()
681            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
682
683        Ok(CounterData(value))
684    }
685
686    /// Actual read implementation for when `ReadFormat::GROUP` is set.
687    fn do_read_group(&mut self) -> io::Result<GroupData> {
688        use std::io::Read;
689        use std::mem::size_of;
690
691        use crate::data::ReadGroup;
692        use crate::flags::ReadFormatExt;
693
694        // The general structure format looks like this, depending on what
695        // read_format flags were enabled.
696        //
697        // struct read_format {
698        //     u64 nr;            /* The number of events */
699        //     u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
700        //     u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
701        //     struct {
702        //         u64 value;     /* The value of the event */
703        //         u64 id;        /* if PERF_FORMAT_ID */
704        //         u64 lost;      /* if PERF_FORMAT_LOST */
705        //     } values[nr];
706        // };
707        let read_format = self.config.read_format();
708        let prefix_len = read_format.prefix_len();
709        let element_len = read_format.element_len();
710
711        let mut elements = (self.member_count as usize).max(1);
712        let mut data = vec![0u8; (prefix_len + elements * element_len) * size_of::<u64>()];
713
714        // Backoff loop to try and get the correct size.
715        //
716        // There's no way to know when new counters are added to the current
717        // group, so to make sure reads succeed we expand the buffer whenever
718        // we get ENOSPC until the read completes.
719        //
720        // The next time around self.member_count will be set to the correct
721        // count and we won't need to go through this loop multiple times.
722        let len = loop {
723            match self.file.read(&mut data) {
724                Ok(len) => break len,
725                Err(e) if e.raw_os_error() == Some(libc::ENOSPC) => {
726                    elements *= 2;
727                    data.resize((prefix_len + elements * element_len) * size_of::<u64>(), 0);
728                }
729                Err(e) => return Err(e),
730            }
731        };
732
733        if len == 0 {
734            return Err(io::Error::new(
735                io::ErrorKind::UnexpectedEof,
736                "the kernel was unable to schedule the counter or group",
737            ));
738        }
739
740        data.truncate(len);
741        let mut parser = crate::data::parse::Parser::new(data.as_slice(), self.config.clone());
742        let data: ReadGroup = parser
743            .parse::<ReadGroup>()
744            .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
745            .into_owned();
746
747        let data = GroupData::new(data);
748
749        self.member_count = data
750            .len()
751            .try_into()
752            .expect("group had more than u32::MAX elements");
753
754        Ok(data)
755    }
756}
757
758impl AsRawFd for Counter {
759    fn as_raw_fd(&self) -> RawFd {
760        self.file.as_raw_fd()
761    }
762}
763
764impl IntoRawFd for Counter {
765    fn into_raw_fd(self) -> RawFd {
766        self.file.into_raw_fd()
767    }
768}
769
770impl AsRef<Counter> for &'_ Counter {
771    fn as_ref(&self) -> &Counter {
772        self
773    }
774}
775
776impl AsMut<Counter> for &'_ mut Counter {
777    fn as_mut(&mut self) -> &mut Counter {
778        self
779    }
780}
781
782impl fmt::Debug for Counter {
783    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
784        f.debug_struct("Counter")
785            .field("fd", &self.as_raw_fd())
786            .field("id", &self.id())
787            .finish_non_exhaustive()
788    }
789}
790
791/// The data retrieved by reading from a [`Counter`].
792#[derive(Clone, Debug)]
793pub struct CounterData(crate::data::ReadValue);
794
795impl CounterData {
796    /// The counter value.
797    ///
798    /// The meaning of this field depends on how the counter was configured when
799    /// it was built; see ['Builder'].
800    pub fn count(&self) -> u64 {
801        self.0.value()
802    }
803
804    /// How long this counter was enabled by the program.
805    ///
806    /// This will be present if [`ReadFormat::TOTAL_TIME_ENABLED`] was
807    /// specified in `read_format` when the counter was built.
808    pub fn time_enabled(&self) -> Option<Duration> {
809        self.0.time_enabled().map(Duration::from_nanos)
810    }
811
812    /// How long the kernel actually ran this counter.
813    ///
814    /// If `time_enabled == time_running` then the counter ran for the entire
815    /// period it was enabled, without interruption. Otherwise, the counter
816    /// shared the underlying hardware with others and you should adjust its
817    /// value accordingly.
818    ///
819    /// This will be present if [`ReadFormat::TOTAL_TIME_RUNNING`] was
820    /// specified in `read_format` when the counter was built.
821    pub fn time_running(&self) -> Option<Duration> {
822        self.0.time_running().map(Duration::from_nanos)
823    }
824
825    /// The number of lost samples of this event.
826    ///
827    /// This will be present if [`ReadFormat::LOST`] was specified in
828    /// `read_format` when the counter was built.
829    pub fn lost(&self) -> Option<u64> {
830        self.0.lost()
831    }
832}
833
834/// The value of a counter, along with timesharing data.
835///
836/// Some counters are implemented in hardware, and the processor can run
837/// only a fixed number of them at a time. If more counters are requested
838/// than the hardware can support, the kernel timeshares them on the
839/// hardware.
840///
841/// This struct holds the value of a counter, together with the time it was
842/// enabled, and the proportion of that for which it was actually running.
843#[repr(C)]
844#[derive(Copy, Clone, Debug)]
845pub struct CountAndTime {
846    /// The counter value.
847    ///
848    /// The meaning of this field depends on how the counter was configured when
849    /// it was built; see ['Builder'].
850    pub count: u64,
851
852    /// How long this counter was enabled by the program, in nanoseconds.
853    pub time_enabled: u64,
854
855    /// How long the kernel actually ran this counter, in nanoseconds.
856    ///
857    /// If `time_enabled == time_running`, then the counter ran for the entire
858    /// period it was enabled, without interruption. Otherwise, the counter
859    /// shared the underlying hardware with others, and you should prorate its
860    /// value accordingly.
861    pub time_running: u64,
862}
863
864/// Produce an `io::Result` from an errno-style system call.
865///
866/// An 'errno-style' system call is one that reports failure by returning -1 and
867/// setting the C `errno` value when an error occurs.
868fn check_errno_syscall<F, R>(f: F) -> io::Result<R>
869where
870    F: FnOnce() -> R,
871    R: PartialOrd + Default,
872{
873    let result = f();
874    if result < R::default() {
875        Err(io::Error::last_os_error())
876    } else {
877        Ok(result)
878    }
879}
880
881#[cfg(test)]
882mod tests {
883    use super::*;
884
885    #[test]
886    fn simple_build() {
887        Builder::new(crate::events::Software::DUMMY)
888            .build()
889            .expect("Couldn't build default Counter");
890    }
891
892    #[test]
893    #[cfg(target_os = "linux")]
894    fn test_error_code_is_correct() {
895        // This configuration should always result in EINVAL
896
897        // CPU_CLOCK is literally always supported so we don't have to worry
898        // about test failures when in VMs.
899        let builder = Builder::new(events::Software::CPU_CLOCK)
900            // There should _hopefully_ never be a system with this many CPUs.
901            .one_cpu(i32::MAX as usize)
902            .clone();
903
904        match builder.build() {
905            Ok(_) => panic!("counter construction was not supposed to succeed"),
906            Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EINVAL)),
907        }
908    }
909
910    #[test]
911    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
912    fn test_sampler_rdpmc() {
913        let mut sampler = Builder::new(events::Hardware::INSTRUCTIONS)
914            .enabled(true)
915            .build()
916            .expect("failed to build counter")
917            .sampled(1024)
918            .expect("failed to build sampler");
919
920        let read = sampler.read_user();
921        sampler.disable().unwrap();
922        let value = sampler.read_full().unwrap();
923
924        assert!(read.time_running() <= value.time_running().unwrap());
925        assert!(read.time_enabled() <= value.time_enabled().unwrap());
926
927        if let Some(count) = read.count() {
928            assert!(count <= value.count(), "{count} <= {}", value.count());
929        }
930    }
931}