perf_event/lib.rs
1//! A performance monitoring API for Linux.
2//!
3//! This crate provides access to processor and kernel counters for things like
4//! instruction completions, cache references and misses, branch predictions,
5//! context switches, page faults, and so on.
6//!
7//! For example, to compare the number of clock cycles elapsed with the number
8//! of instructions completed during one call to `println!`:
9//!
10//! ```
11//! use perf_event::events::Hardware;
12//! use perf_event::{Builder, Group};
13//!
14//! # fn main() -> std::io::Result<()> {
15//! // A `Group` lets us enable and disable several counters atomically.
16//! let mut group = Group::new()?;
17//! let cycles = group.add(&Builder::new(Hardware::CPU_CYCLES))?;
18//! let insns = group.add(&Builder::new(Hardware::INSTRUCTIONS))?;
19//!
20//! let vec = (0..=51).collect::<Vec<_>>();
21//!
22//! group.enable()?;
23//! println!("{:?}", vec);
24//! group.disable()?;
25//!
26//! let counts = group.read()?;
27//! println!(
28//! "cycles / instructions: {} / {} ({:.2} cpi)",
29//! counts[&cycles],
30//! counts[&insns],
31//! (counts[&cycles] as f64 / counts[&insns] as f64)
32//! );
33//!
34//! Ok(())
35//! # }
36//! ```
37//!
38//! This crate is built on top of the Linux [`perf_event_open`][man] system
39//! call; that documentation has the authoritative explanations of exactly what
40//! all the counters mean.
41//!
42//! There are two main types for measurement:
43//!
44//! - A [`Counter`] is an individual counter. Use [`Builder`] to construct one.
45//!
46//! - A [`Group`] is a collection of counters that can be enabled and disabled
47//! atomically, so that they cover exactly the same period of execution,
48//! allowing meaningful comparisons of the individual values. You can
49//! construct one via [`Group::new`] or use [`Builder`] to construct one with
50//! custom settings.
51//!
52//! If you're familiar with the kernel API already:
53//!
54//! - A `Builder` holds the arguments to a `perf_event_open` call: a `struct
55//! perf_event_attr` and a few other fields.
56//!
57//! - `Counter` and `Group` objects are just event file descriptors, together
58//! with their kernel id numbers, and some other details you need to actually
59//! use them. They're different types because they yield different types of
60//! results, and because you can't retrieve a `Group`'s counts without knowing
61//! how many members it has.
62//!
63//! ### Call for PRs
64//!
65//! Linux's `perf_event_open` API can report all sorts of things this crate
66//! doesn't yet understand: stack traces, logs of executable and shared library
67//! activity, tracepoints, kprobes, uprobes, and so on. And beyond the counters
68//! in the kernel header files, there are others that can only be found at
69//! runtime by consulting `sysfs`, specific to particular processors and
70//! devices. For example, modern Intel processors have counters that measure
71//! power consumption in Joules.
72//!
73//! If you find yourself in need of something this crate doesn't support, please
74//! consider submitting a pull request.
75//!
76//! [man]: https://www.mankier.com/2/perf_event_open
77
78#![cfg_attr(debug_assertions, warn(missing_docs))]
79#![cfg_attr(not(debug_assertions), deny(missing_docs))]
80// The bitflags macro is generating this lint internally.
81#![allow(clippy::assign_op_pattern)]
82
83/// A helper macro for silencing warnings when a type is only implemented so
84/// that it can be linked in the docs.
85macro_rules! used_in_docs {
86 ($t:ident) => {
87 const _: () = {
88 // Using a module here means that this macro can accept any identifier that
89 // would normally be used in an import statement.
90 mod use_item {
91 #[allow(unused_imports)]
92 use super::$t;
93 }
94 };
95 };
96}
97
98use std::convert::TryInto;
99use std::fs::File;
100use std::os::fd::{AsRawFd, IntoRawFd, RawFd};
101use std::time::Duration;
102use std::{fmt, io};
103
104use crate::data::endian::Native;
105use crate::data::parse::ParseConfig;
106use crate::sys::bindings::PERF_IOC_FLAG_GROUP;
107use crate::sys::ioctls;
108
109pub mod events;
110
111mod builder;
112mod flags;
113mod group;
114mod group_data;
115mod sampler;
116
117// Make sure the examples in the readme are tested.
118#[doc = include_str!("../README.md")]
119mod readme {}
120
121#[cfg(feature = "hooks")]
122pub mod hooks;
123
124// When the `"hooks"` feature is not enabled, call directly into
125// `perf-event-open-sys`.
126// When the `"hooks"` feature is enabled, `sys` functions allow for
127// interposed functions that provide simulated results for testing.
128#[cfg(feature = "hooks")]
129use hooks::sys;
130/// Support for parsing data contained within `Record`s.
131///
132/// Note that this module is actually just the [`perf-event-data`][ped] crate.
133/// The documentation has been inlined here for convenience.
134// TODO: Directly linking to the crate causes an ICE in rustdoc. It is fixed in
135// nightly but not in the latest stable.
136///
137/// [ped]: http://docs.rs/perf-event-data
138///
139/// # perf-event-data
140#[doc(inline)]
141pub use perf_event_data as data;
142#[cfg(not(feature = "hooks"))]
143use perf_event_open_sys as sys;
144
145pub use crate::builder::{Builder, UnsupportedOptionsError};
146#[doc(inline)]
147pub use crate::data::{ReadFormat, SampleFlags as SampleFlag};
148pub use crate::flags::{Clock, SampleBranchFlag, SampleSkid};
149pub use crate::group::Group;
150pub use crate::group_data::{GroupData, GroupEntry, GroupIter};
151pub use crate::sampler::{Record, Sampler, UserReadData};
152
153/// A counter for a single kernel or hardware event.
154///
155/// A counter represents a single performance monitoring counter. When building
156/// the counter you select the event you would like it to count. Once the
157/// counter is created, then you can enable or disable it, call its [`read`]
158/// method to retrieve its current value, and reset it to zero.
159///
160/// # Groups
161/// The kernel allows for counters to be grouped together. A group of counters
162/// will be scheduled onto the CPU as a unit. This allows you to directly
163/// compare the values collected by multiple counters.
164///
165/// There are two ways to go about working with groups:
166/// - Use the [`Group`] type. It is not configurable but it makes groups easy to
167/// setup and use.
168/// - Pick one `Counter` to be a group leader, create the other counters with
169/// [`Builder::build_with_group`] and use [`enable_group`], [`disable_group`],
170/// and [`reset_group`] on any of its members to control the group. To read
171/// all counters in the group at once you'll need to create at least one
172/// counter with [`ReadFormat::GROUP`] so that [`read_group`] will read the
173/// entire group.
174///
175/// A counter represents a single performance monitoring counter. While
176/// creating the counter - via [`Builder`] - you select the event you would
177/// like to count. Once the counter is created, then you can enable or disable
178/// it, call its [`read`] method to retrieve the current count (or counts if
179/// it is a [`Group`]), and reset it to zero.
180///
181/// [`read`]: crate::Counter::read
182/// [`read_group`]: Self::read_group
183/// [`reset_group`]: Self::reset_group
184/// [`enable_group`]: Self::enable_group
185/// [`disable_group`]: Self::disable_group
186pub struct Counter {
187 /// The file descriptor for this counter, returned by `perf_event_open`.
188 ///
189 /// When a `Counter` is dropped, this `File` is dropped, and the kernel
190 /// removes the counter from any group it belongs to.
191 file: File,
192
193 /// The unique id assigned to this counter by the kernel.
194 id: u64,
195
196 /// The parse config used by this counter.
197 config: ParseConfig<Native>,
198
199 /// If we are a `Group`, then this is the count of how many members we have.
200 member_count: u32,
201}
202
203impl Counter {
204 /// Common initialization code shared between counters and groups.
205 pub(crate) fn new_internal(file: File, config: ParseConfig<Native>) -> std::io::Result<Self> {
206 let mut counter = Self {
207 file,
208 id: 0,
209 config,
210 member_count: 1,
211 };
212
213 // If we are part of a group then the id is used to find results in the
214 // Counts structure. Otherwise, it's just used for debug output.
215 let mut id = 0;
216 counter.ioctl(|fd| unsafe { ioctls::ID(fd, &mut id) })?;
217 counter.id = id;
218
219 Ok(counter)
220 }
221
222 /// Return this counter's kernel-assigned unique id.
223 pub fn id(&self) -> u64 {
224 self.id
225 }
226
227 /// The [`ParseConfig`] for this `Counter`.
228 pub fn config(&self) -> &ParseConfig<Native> {
229 &self.config
230 }
231
232 /// Allow this `Counter` to begin counting its designated event.
233 ///
234 /// This does not affect whatever value the `Counter` had previously; new
235 /// events add to the current count. To clear a `Counter`, use [`reset`].
236 ///
237 /// Note that, depending on how it was configured, a counter may start off
238 /// enabled or be automatically enabled by the kernel when an event occurs.
239 /// For example, setting [`enable_on_exec`] will cause this counter to be
240 /// automatically enabled when the current process calls `execve(2)`.
241 ///
242 /// If you want to enable all counters in the same group as this one then
243 /// use [`enable_group`] instead.
244 ///
245 /// # Examples
246 /// Enable an individual counter:
247 /// ```
248 /// use perf_event::events::Hardware;
249 /// use perf_event::Builder;
250 ///
251 /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).build()?;
252 /// counter.enable()?;
253 /// // ...
254 /// assert_ne!(counter.read()?, 0);
255 /// # std::io::Result::Ok(())
256 /// ```
257 ///
258 /// [`Group`]: crate::Group
259 /// [`reset`]: Self::reset
260 /// [`enable_group`]: Self::enable_group
261 /// [`enable_on_exec`]: crate::Builder::enable_on_exec
262 pub fn enable(&mut self) -> io::Result<()> {
263 self.ioctl(|fd| unsafe { ioctls::ENABLE(fd, 0) })
264 }
265
266 /// Enable all counters in the same group as this one.
267 ///
268 /// This does not affect whatever value the `Counter` had previously; new
269 /// events add to the current count. To clear a counter group, use
270 /// [`reset_group`].
271 ///
272 /// See [`enable`] for the version that only applies to the current
273 /// counter.
274 ///
275 /// # Examples
276 /// Enable all counters in a group:
277 /// ```
278 /// use perf_event::events::Hardware;
279 /// use perf_event::{Builder, Group};
280 ///
281 /// let mut group = Group::new()?;
282 /// let mut cycles = Builder::new(Hardware::CPU_CYCLES).build_with_group(&mut group)?;
283 /// group.enable()?;
284 /// // ...
285 /// assert_ne!(cycles.read()?, 0);
286 /// # std::io::Result::Ok(())
287 /// ```
288 ///
289 /// [`enable`]: Self::enable
290 /// [`reset_group`]: Self::reset_group
291 pub fn enable_group(&mut self) -> io::Result<()> {
292 self.ioctl(|fd| unsafe { ioctls::ENABLE(fd, PERF_IOC_FLAG_GROUP) })
293 }
294
295 /// Make this `Counter` stop counting its designated event.
296 ///
297 /// This does not affect the value of this `Counter`.
298 ///
299 /// To disable all counters in the group use
300 /// [`disable_group`](Self::disable_group).
301 ///
302 /// # Examples
303 /// Disable a single counter:
304 /// ```
305 /// use perf_event::events::Hardware;
306 /// use perf_event::Builder;
307 ///
308 /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).build()?;
309 /// counter.enable()?;
310 ///
311 /// // Counter is continuously updating
312 /// let val1 = counter.read()?;
313 /// let val2 = counter.read()?;
314 /// counter.disable()?;
315 ///
316 /// // Counter is no longer updating
317 /// let val3 = counter.read()?;
318 /// let val4 = counter.read()?;
319 ///
320 /// assert_ne!(val1, val2);
321 /// assert_eq!(val3, val4);
322 /// # std::io::Result::Ok(())
323 /// ```
324 ///
325 /// [`Group`]: crate::Group
326 /// [`disable`]: struct.Group.html#method.disable
327 pub fn disable(&mut self) -> io::Result<()> {
328 self.ioctl(|fd| unsafe { ioctls::DISABLE(fd, 0) })
329 }
330
331 /// Disable all counters in the same group as this one.
332 ///
333 /// This does not affect the counter values.
334 ///
335 /// To disable only this counter use [`disable`].
336 ///
337 /// [`disable`]: Self::disable
338 pub fn disable_group(&mut self) -> io::Result<()> {
339 self.ioctl(|fd| unsafe { ioctls::DISABLE(fd, PERF_IOC_FLAG_GROUP) })
340 }
341
342 /// Reset the value of this `Counter` to zero.
343 ///
344 /// To reset the value of all counters in the current group use
345 /// [`reset_group`](Self::reset_group).
346 ///
347 /// # Examples
348 /// Reset a single counter
349 /// ```
350 /// use perf_event::events::Hardware;
351 /// use perf_event::Builder;
352 ///
353 /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).build()?;
354 /// counter.enable()?;
355 /// // ...
356 /// counter.disable()?;
357 ///
358 /// assert_ne!(counter.read()?, 0);
359 /// counter.reset()?;
360 /// assert_eq!(counter.read()?, 0);
361 /// # std::io::Result::Ok(())
362 /// ```
363 pub fn reset(&mut self) -> io::Result<()> {
364 self.ioctl(|fd| unsafe { ioctls::RESET(fd, 0) })
365 }
366
367 /// Reset the value of all counters in the same group as this one to zero.
368 ///
369 /// To only reset the value of this counter use [`reset`](Self::reset).
370 pub fn reset_group(&mut self) -> io::Result<()> {
371 self.ioctl(|fd| unsafe { ioctls::RESET(fd, PERF_IOC_FLAG_GROUP) })
372 }
373
374 /// Attach an eBPF program to this counter.
375 ///
376 /// This will only work if this counter was created as a kprobe
377 /// tracepoint event.
378 ///
379 /// This method corresponds to the `IOC_SET_BPF` ioctl.
380 pub fn set_bpf(&mut self, bpf: RawFd) -> io::Result<()> {
381 self.ioctl(|fd| unsafe { ioctls::SET_BPF(fd, bpf as _) })
382 .map(drop)
383 }
384
385 /// Map a buffer for samples from this counter, returning a [`Sampler`]
386 /// that can be used to access them.
387 ///
388 /// There are some restrictions on the size of the mapped buffer. To
389 /// accomodate this `map_len` will always be rounded up to the next
390 /// power-of-two multiple of the system page size. There will always
391 /// be at least two pages allocated for the ring buffer: one for the
392 /// control data structures, and one for actual data.
393 ///
394 /// # Example
395 /// This example shows creating a sample to record mmap events within the
396 /// current process. If you do this early enough, you can then track what
397 /// libraries your process is loading.
398 /// ```
399 /// use perf_event::events::Software;
400 /// use perf_event::Builder;
401 ///
402 /// let mut sampler = Builder::new(Software::DUMMY)
403 /// .mmap(true)
404 /// .build()?
405 /// .sampled(128)?;
406 /// # std::io::Result::Ok(())
407 /// ```
408 pub fn sampled(self, map_len: usize) -> io::Result<Sampler> {
409 let pagesize =
410 check_errno_syscall(|| unsafe { libc::sysconf(libc::_SC_PAGESIZE) })? as usize;
411
412 let len = pagesize
413 + map_len
414 .checked_next_power_of_two()
415 .unwrap_or((usize::MAX >> 1) + 1)
416 .max(pagesize);
417
418 let mmap = memmap2::MmapOptions::new().len(len).map_raw(&self.file)?;
419
420 Ok(Sampler::new(self, mmap))
421 }
422
423 /// Helper function for doing ioctls on a counter.
424 pub(crate) fn ioctl<F>(&self, ioctl: F) -> io::Result<()>
425 where
426 F: FnOnce(RawFd) -> libc::c_int,
427 {
428 check_errno_syscall(|| ioctl(self.as_raw_fd())).map(drop)
429 }
430}
431
432impl Counter {
433 /// Return this `Counter`'s current value as a `u64`.
434 ///
435 /// Consider using [`read_full`] or (if read_format has the required flags)
436 /// [`read_count_and_time`] instead. There are limitations around how
437 /// many hardware counters can be on a single CPU at a time. If more
438 /// counters are requested than the hardware can support then the kernel
439 /// will timeshare them on the hardware. Looking at just the counter value
440 /// gives you no indication that this has happened.
441 ///
442 /// If you would like to read the values for an entire group then you will
443 /// need to use [`read_group`] (and set [`ReadFormat::GROUP`]) instead.
444 ///
445 /// [`read_full`]: Self::read_full
446 /// [`read_group`]: Self::read_group
447 /// [`read_count_and_time`]: Self::read_count_and_time
448 /// [`ReadFormat::GROUP`]: ReadFormat::GROUP
449 ///
450 /// # Errors
451 /// This function may return errors in the following notable cases:
452 /// - `ENOSPC` is returned if the `read_format` that this `Counter` was
453 /// built with does not match the format of the data. This can also occur
454 /// if `read_format` contained options not supported by this crate.
455 /// - If the counter is part of a group and was unable to be pinned to the
456 /// CPU then reading will return an error with kind [`UnexpectedEof`].
457 ///
458 /// Other errors are also possible under unexpected conditions (e.g. `EBADF`
459 /// if the file descriptor is closed).
460 ///
461 /// [`UnexpectedEof`]: io::ErrorKind::UnexpectedEof
462 ///
463 /// # Example
464 /// ```
465 /// use perf_event::events::Hardware;
466 /// use perf_event::Builder;
467 ///
468 /// let mut counter = Builder::new(Hardware::INSTRUCTIONS).enabled(true).build()?;
469 ///
470 /// let instrs = counter.read()?;
471 /// # std::io::Result::Ok(())
472 /// ```
473 pub fn read(&mut self) -> io::Result<u64> {
474 Ok(self.read_full()?.count())
475 }
476
477 /// Return all data that this `Counter` is configured to provide.
478 ///
479 /// The exact fields that are returned within the [`CounterData`] struct
480 /// depend on what was specified for `read_format` when constructing this
481 /// counter. This method is the only one that gives access to all values
482 /// returned by the kernel.
483 ///
484 /// If this `Counter` was created with [`ReadFormat::GROUP`] then this will
485 /// read the entire group but only return the data for this specific
486 /// counter.
487 ///
488 /// # Errors
489 /// This function may return errors in the following notable cases:
490 /// - `ENOSPC` is returned if the `read_format` that this `Counter` was
491 /// built with does not match the format of the data. This can also occur
492 /// if `read_format` contained options not supported by this crate.
493 /// - If the counter is part of a group and was unable to be pinned to the
494 /// CPU then reading will return an error with kind [`UnexpectedEof`].
495 ///
496 /// Other errors are also possible under unexpected conditions (e.g. `EBADF`
497 /// if the file descriptor is closed).
498 ///
499 /// [`UnexpectedEof`]: io::ErrorKind::UnexpectedEof
500 ///
501 /// # Example
502 /// ```
503 /// use std::time::Duration;
504 ///
505 /// use perf_event::events::Hardware;
506 /// use perf_event::{Builder, ReadFormat};
507 ///
508 /// let mut counter = Builder::new(Hardware::INSTRUCTIONS)
509 /// .read_format(ReadFormat::TOTAL_TIME_RUNNING)
510 /// .enabled(true)
511 /// .build()?;
512 /// // ...
513 /// let data = counter.read_full()?;
514 /// let instructions = data.count();
515 /// let time_running = data.time_running().unwrap();
516 /// let ips = instructions as f64 / time_running.as_secs_f64();
517 ///
518 /// println!("instructions/s: {ips}");
519 /// # std::io::Result::Ok(())
520 /// ```
521 pub fn read_full(&mut self) -> io::Result<CounterData> {
522 if !self.is_group() {
523 return self.do_read_single();
524 }
525
526 let group = self.do_read_group()?;
527 let entry = group.get(self).unwrap();
528 let data = crate::data::ReadValue::from_group_and_entry(&group.data, &entry.0);
529
530 Ok(CounterData(data))
531 }
532
533 /// Read the values of all the counters in the current group.
534 ///
535 /// Note that unless [`ReadFormat::GROUP`] was specified when building this
536 /// `Counter` this will only read the data for the current `Counter`.
537 ///
538 /// # Errors
539 /// This function may return errors in the following notable cases:
540 /// - `ENOSPC` is returned if the `read_format` that this `Counter` was
541 /// built with does not match the format of the data. This can also occur
542 /// if `read_format` contained options not supported by this crate.
543 /// - If the counter is part of a group and was unable to be pinned to the
544 /// CPU then reading will return an error with kind [`UnexpectedEof`].
545 ///
546 /// Other errors are also possible under unexpected conditions (e.g. `EBADF`
547 /// if the file descriptor is closed).
548 ///
549 /// [`UnexpectedEof`]: io::ErrorKind::UnexpectedEof
550 ///
551 /// # Example
552 /// Compute the CPI for a region of code:
553 /// ```
554 /// use perf_event::events::Hardware;
555 /// use perf_event::{Builder, ReadFormat};
556 ///
557 /// let mut instrs = Builder::new(Hardware::INSTRUCTIONS)
558 /// .read_format(ReadFormat::GROUP)
559 /// .build()?;
560 /// let mut cycles = Builder::new(Hardware::CPU_CYCLES).build_with_group(&mut instrs)?;
561 ///
562 /// instrs.enable_group()?;
563 /// // ...
564 /// instrs.disable_group()?;
565 ///
566 /// let data = instrs.read_group()?;
567 /// let instrs = data[&instrs];
568 /// let cycles = data[&cycles];
569 ///
570 /// println!("CPI: {}", cycles as f64 / instrs as f64);
571 /// # std::io::Result::Ok(())
572 /// ```
573 pub fn read_group(&mut self) -> io::Result<GroupData> {
574 if self.is_group() {
575 self.do_read_group()
576 } else {
577 Ok(GroupData::new(self.do_read_single()?.0.into()))
578 }
579 }
580
581 /// Return this `Counter`'s current value and timesharing data.
582 ///
583 /// Some counters are implemented in hardware, and the processor can run
584 /// only a fixed number of them at a time. If more counters are requested
585 /// than the hardware can support, the kernel timeshares them on the
586 /// hardware.
587 ///
588 /// This method returns a [`CountAndTime`] struct, whose `count` field holds
589 /// the counter's value, and whose `time_enabled` and `time_running` fields
590 /// indicate how long you had enabled the counter, and how long the counter
591 /// was actually scheduled on the processor. This lets you detect whether
592 /// the counter was timeshared, and adjust your use accordingly. Times
593 /// are reported in nanoseconds.
594 ///
595 /// # Errors
596 /// See the [man page][man] for possible errors when reading from the
597 /// counter. This method will also return an error if `read_format` does
598 /// not include both [`TOTAL_TIME_ENABLED`] and [`TOTAL_TIME_RUNNING`].
599 ///
600 /// # Example
601 /// ```
602 /// # use perf_event::Builder;
603 /// # use perf_event::events::Software;
604 /// #
605 /// # let mut counter = Builder::new(Software::DUMMY).build()?;
606 /// let cat = counter.read_count_and_time()?;
607 /// if cat.time_running == 0 {
608 /// println!("No data collected.");
609 /// } else if cat.time_running < cat.time_enabled {
610 /// // Note: this way of scaling is accurate, but `u128` division
611 /// // is usually implemented in software, which may be slow.
612 /// println!(
613 /// "{} instructions (estimated)",
614 /// (cat.count as u128 * cat.time_enabled as u128 / cat.time_running as u128) as u64
615 /// );
616 /// } else {
617 /// println!("{} instructions", cat.count);
618 /// }
619 /// # std::io::Result::Ok(())
620 /// ```
621 ///
622 /// Note that `Group` also has a [`read`] method, which reads all
623 /// its member `Counter`s' values at once.
624 ///
625 /// [`read`]: crate::Group::read
626 /// [`TOTAL_TIME_ENABLED`]: ReadFormat::TOTAL_TIME_ENABLED
627 /// [`TOTAL_TIME_RUNNING`]: ReadFormat::TOTAL_TIME_RUNNING
628 /// [man]: https://www.mankier.com/2/perf_event_open
629 pub fn read_count_and_time(&mut self) -> io::Result<CountAndTime> {
630 let data = self.read_full()?;
631
632 Ok(CountAndTime {
633 count: data.count(),
634 time_enabled: data
635 .time_enabled()
636 .ok_or_else(|| {
637 io::Error::new(
638 io::ErrorKind::Other,
639 "time_enabled was not enabled within read_format",
640 )
641 })?
642 .as_nanos() as _,
643 time_running: data
644 .time_running()
645 .ok_or_else(|| {
646 io::Error::new(
647 io::ErrorKind::Other,
648 "time_running was not enabled within read_format",
649 )
650 })?
651 .as_nanos() as _,
652 })
653 }
654
655 fn is_group(&self) -> bool {
656 self.config.read_format().contains(ReadFormat::GROUP)
657 }
658
659 /// Actual read implementation for when `ReadFormat::GROUP` is not set.
660 fn do_read_single(&mut self) -> io::Result<CounterData> {
661 use std::io::Read;
662 use std::mem::size_of;
663
664 use crate::flags::ReadFormatExt;
665
666 debug_assert!(!self.is_group());
667
668 let mut data = [0u8; ReadFormat::MAX_NON_GROUP_SIZE * size_of::<u64>()];
669 let len = self.file.read(&mut data)?;
670
671 if len == 0 {
672 return Err(io::Error::new(
673 io::ErrorKind::UnexpectedEof,
674 "the kernel was unable to schedule the counter or group",
675 ));
676 }
677
678 let mut parser = crate::data::parse::Parser::new(&data[..len], self.config.clone());
679 let value: crate::data::ReadValue = parser
680 .parse()
681 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
682
683 Ok(CounterData(value))
684 }
685
686 /// Actual read implementation for when `ReadFormat::GROUP` is set.
687 fn do_read_group(&mut self) -> io::Result<GroupData> {
688 use std::io::Read;
689 use std::mem::size_of;
690
691 use crate::data::ReadGroup;
692 use crate::flags::ReadFormatExt;
693
694 // The general structure format looks like this, depending on what
695 // read_format flags were enabled.
696 //
697 // struct read_format {
698 // u64 nr; /* The number of events */
699 // u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
700 // u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
701 // struct {
702 // u64 value; /* The value of the event */
703 // u64 id; /* if PERF_FORMAT_ID */
704 // u64 lost; /* if PERF_FORMAT_LOST */
705 // } values[nr];
706 // };
707 let read_format = self.config.read_format();
708 let prefix_len = read_format.prefix_len();
709 let element_len = read_format.element_len();
710
711 let mut elements = (self.member_count as usize).max(1);
712 let mut data = vec![0u8; (prefix_len + elements * element_len) * size_of::<u64>()];
713
714 // Backoff loop to try and get the correct size.
715 //
716 // There's no way to know when new counters are added to the current
717 // group, so to make sure reads succeed we expand the buffer whenever
718 // we get ENOSPC until the read completes.
719 //
720 // The next time around self.member_count will be set to the correct
721 // count and we won't need to go through this loop multiple times.
722 let len = loop {
723 match self.file.read(&mut data) {
724 Ok(len) => break len,
725 Err(e) if e.raw_os_error() == Some(libc::ENOSPC) => {
726 elements *= 2;
727 data.resize((prefix_len + elements * element_len) * size_of::<u64>(), 0);
728 }
729 Err(e) => return Err(e),
730 }
731 };
732
733 if len == 0 {
734 return Err(io::Error::new(
735 io::ErrorKind::UnexpectedEof,
736 "the kernel was unable to schedule the counter or group",
737 ));
738 }
739
740 data.truncate(len);
741 let mut parser = crate::data::parse::Parser::new(data.as_slice(), self.config.clone());
742 let data: ReadGroup = parser
743 .parse::<ReadGroup>()
744 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?
745 .into_owned();
746
747 let data = GroupData::new(data);
748
749 self.member_count = data
750 .len()
751 .try_into()
752 .expect("group had more than u32::MAX elements");
753
754 Ok(data)
755 }
756}
757
758impl AsRawFd for Counter {
759 fn as_raw_fd(&self) -> RawFd {
760 self.file.as_raw_fd()
761 }
762}
763
764impl IntoRawFd for Counter {
765 fn into_raw_fd(self) -> RawFd {
766 self.file.into_raw_fd()
767 }
768}
769
770impl AsRef<Counter> for &'_ Counter {
771 fn as_ref(&self) -> &Counter {
772 self
773 }
774}
775
776impl AsMut<Counter> for &'_ mut Counter {
777 fn as_mut(&mut self) -> &mut Counter {
778 self
779 }
780}
781
782impl fmt::Debug for Counter {
783 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
784 f.debug_struct("Counter")
785 .field("fd", &self.as_raw_fd())
786 .field("id", &self.id())
787 .finish_non_exhaustive()
788 }
789}
790
791/// The data retrieved by reading from a [`Counter`].
792#[derive(Clone, Debug)]
793pub struct CounterData(crate::data::ReadValue);
794
795impl CounterData {
796 /// The counter value.
797 ///
798 /// The meaning of this field depends on how the counter was configured when
799 /// it was built; see ['Builder'].
800 pub fn count(&self) -> u64 {
801 self.0.value()
802 }
803
804 /// How long this counter was enabled by the program.
805 ///
806 /// This will be present if [`ReadFormat::TOTAL_TIME_ENABLED`] was
807 /// specified in `read_format` when the counter was built.
808 pub fn time_enabled(&self) -> Option<Duration> {
809 self.0.time_enabled().map(Duration::from_nanos)
810 }
811
812 /// How long the kernel actually ran this counter.
813 ///
814 /// If `time_enabled == time_running` then the counter ran for the entire
815 /// period it was enabled, without interruption. Otherwise, the counter
816 /// shared the underlying hardware with others and you should adjust its
817 /// value accordingly.
818 ///
819 /// This will be present if [`ReadFormat::TOTAL_TIME_RUNNING`] was
820 /// specified in `read_format` when the counter was built.
821 pub fn time_running(&self) -> Option<Duration> {
822 self.0.time_running().map(Duration::from_nanos)
823 }
824
825 /// The number of lost samples of this event.
826 ///
827 /// This will be present if [`ReadFormat::LOST`] was specified in
828 /// `read_format` when the counter was built.
829 pub fn lost(&self) -> Option<u64> {
830 self.0.lost()
831 }
832}
833
834/// The value of a counter, along with timesharing data.
835///
836/// Some counters are implemented in hardware, and the processor can run
837/// only a fixed number of them at a time. If more counters are requested
838/// than the hardware can support, the kernel timeshares them on the
839/// hardware.
840///
841/// This struct holds the value of a counter, together with the time it was
842/// enabled, and the proportion of that for which it was actually running.
843#[repr(C)]
844#[derive(Copy, Clone, Debug)]
845pub struct CountAndTime {
846 /// The counter value.
847 ///
848 /// The meaning of this field depends on how the counter was configured when
849 /// it was built; see ['Builder'].
850 pub count: u64,
851
852 /// How long this counter was enabled by the program, in nanoseconds.
853 pub time_enabled: u64,
854
855 /// How long the kernel actually ran this counter, in nanoseconds.
856 ///
857 /// If `time_enabled == time_running`, then the counter ran for the entire
858 /// period it was enabled, without interruption. Otherwise, the counter
859 /// shared the underlying hardware with others, and you should prorate its
860 /// value accordingly.
861 pub time_running: u64,
862}
863
864/// Produce an `io::Result` from an errno-style system call.
865///
866/// An 'errno-style' system call is one that reports failure by returning -1 and
867/// setting the C `errno` value when an error occurs.
868fn check_errno_syscall<F, R>(f: F) -> io::Result<R>
869where
870 F: FnOnce() -> R,
871 R: PartialOrd + Default,
872{
873 let result = f();
874 if result < R::default() {
875 Err(io::Error::last_os_error())
876 } else {
877 Ok(result)
878 }
879}
880
881#[cfg(test)]
882mod tests {
883 use super::*;
884
885 #[test]
886 fn simple_build() {
887 Builder::new(crate::events::Software::DUMMY)
888 .build()
889 .expect("Couldn't build default Counter");
890 }
891
892 #[test]
893 #[cfg(target_os = "linux")]
894 fn test_error_code_is_correct() {
895 // This configuration should always result in EINVAL
896
897 // CPU_CLOCK is literally always supported so we don't have to worry
898 // about test failures when in VMs.
899 let builder = Builder::new(events::Software::CPU_CLOCK)
900 // There should _hopefully_ never be a system with this many CPUs.
901 .one_cpu(i32::MAX as usize)
902 .clone();
903
904 match builder.build() {
905 Ok(_) => panic!("counter construction was not supposed to succeed"),
906 Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EINVAL)),
907 }
908 }
909
910 #[test]
911 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
912 fn test_sampler_rdpmc() {
913 let mut sampler = Builder::new(events::Hardware::INSTRUCTIONS)
914 .enabled(true)
915 .build()
916 .expect("failed to build counter")
917 .sampled(1024)
918 .expect("failed to build sampler");
919
920 let read = sampler.read_user();
921 sampler.disable().unwrap();
922 let value = sampler.read_full().unwrap();
923
924 assert!(read.time_running() <= value.time_running().unwrap());
925 assert!(read.time_enabled() <= value.time_enabled().unwrap());
926
927 if let Some(count) = read.count() {
928 assert!(count <= value.count(), "{count} <= {}", value.count());
929 }
930 }
931}