perf_event/
builder.rs

1use std::fmt;
2use std::fs::File;
3use std::io::{self, ErrorKind};
4use std::os::raw::{c_int, c_ulong};
5use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
6use std::panic::{RefUnwindSafe, UnwindSafe};
7use std::sync::Arc;
8
9use libc::pid_t;
10use perf_event_data::parse::ParseConfig;
11use perf_event_open_sys::bindings;
12
13use crate::events::{Event, EventData};
14use crate::sys::bindings::perf_event_attr;
15use crate::{
16    check_errno_syscall, sys, Clock, Counter, Group, ReadFormat, SampleBranchFlag, SampleFlag,
17    SampleSkid,
18};
19
20/// A builder for [`Counter`]s.
21///
22/// There are dozens of parameters that influence a `Counter`'s behavior.
23/// `Builder` lets you construct a `Counter` by specifying only those parameters
24/// for which you don't want the default value.
25///
26/// A freshly built `Counter` is disabled. To begin counting events, you must
27/// call [`enable`] on the `Counter` or the `Group` to which it belongs.
28///
29/// For example, if you want a `Counter` for instructions retired by the current
30/// process, those are `Builder`'s defaults, so you need only write:
31///
32/// ```
33/// # use perf_event::Builder;
34/// # use perf_event::events::Hardware;
35/// #
36/// let mut insns = Builder::new(Hardware::INSTRUCTIONS).build()?;
37/// # std::io::Result::Ok(())
38/// ```
39///
40/// If you would like to gather individual counters into a [`Group`] you can
41/// use the [`Group::add`] method. A [`Group`] allows you to enable or disable
42/// all the grouped counters atomically.
43///
44/// ```
45/// # use perf_event::{Builder, Group};
46/// # use perf_event::events::Hardware;
47/// #
48/// let mut group = Group::new()?;
49/// let cycles = group.add(&Builder::new(Hardware::CPU_CYCLES))?;
50/// let insns = group.add(&Builder::new(Hardware::INSTRUCTIONS))?;
51/// #
52/// # std::io::Result::Ok(())
53/// ```
54///
55/// Other methods let you select:
56///
57/// - specific processes or cgroups to observe
58/// - specific CPU cores to observe
59///
60/// `Builder` supports only a fraction of the many knobs and dials Linux offers,
61/// but hopefully it will acquire methods to support more of them as time goes
62/// on.
63///
64/// Internally, a `Builder` is just a wrapper around the kernel's `struct
65/// perf_event_attr` type.
66///
67/// [`enable`]: Counter::enable
68#[derive(Clone)]
69pub struct Builder<'a> {
70    attrs: perf_event_attr,
71    who: EventPid<'a>,
72    cpu: Option<usize>,
73
74    // Some events need to hold onto data that is referenced in the builder.
75    // The perf_event_attr struct obviously doesn't have lifetimes so the only
76    // safe solution is to have the builder hold onto it.
77    event_data: Option<Arc<dyn EventData>>,
78}
79
80// Needed for backwards compat
81impl UnwindSafe for Builder<'_> {}
82impl RefUnwindSafe for Builder<'_> {}
83
84#[derive(Clone, Debug)]
85enum EventPid<'a> {
86    /// Monitor the calling process.
87    ThisProcess,
88
89    /// Monitor the given pid.
90    Other(pid_t),
91
92    /// Monitor members of the given cgroup.
93    CGroup(&'a File),
94
95    /// Monitor any process on some given CPU.
96    Any,
97}
98
99impl<'a> EventPid<'a> {
100    // Return the `pid` arg and the `flags` bits representing `self`.
101    fn as_args(&self) -> (pid_t, u32) {
102        match self {
103            EventPid::Any => (-1, 0),
104            EventPid::ThisProcess => (0, 0),
105            EventPid::Other(pid) => (*pid, 0),
106            EventPid::CGroup(file) => (file.as_raw_fd(), sys::bindings::PERF_FLAG_PID_CGROUP),
107        }
108    }
109}
110
111// Methods that actually do work on the builder and aren't just setting
112// config values.
113impl<'a> Builder<'a> {
114    /// Return a new `Builder`, with all parameters set to their defaults.
115    ///
116    /// Return a new `Builder` for the specified event.
117    pub fn new<E: Event>(event: E) -> Self {
118        let mut attrs = perf_event_attr::default();
119
120        // Do the update_attrs bit before we set any of the default state so
121        // that user code can't break configuration we really care about.
122        let data = event.update_attrs_with_data(&mut attrs);
123
124        // Setting `size` accurately will not prevent the code from working
125        // on older kernels. The module comments for `perf_event_open_sys`
126        // explain why in far too much detail.
127        attrs.size = std::mem::size_of::<perf_event_attr>() as u32;
128
129        let mut builder = Self {
130            attrs,
131            who: EventPid::ThisProcess,
132            cpu: None,
133            event_data: data,
134        };
135
136        builder.enabled(false);
137        builder.exclude_kernel(true);
138        builder.exclude_hv(true);
139        builder.read_format(ReadFormat::TOTAL_TIME_ENABLED | ReadFormat::TOTAL_TIME_RUNNING);
140        builder
141    }
142
143    /// Override the event configured for this builder.
144    ///
145    /// This can be used reuse other configuration for the builder (which
146    /// processes/CPUs to observe, sampling fields, read format, etc.) while
147    /// building counters for different events.
148    ///
149    /// Before configuring the [`perf_event_attr`] struct using the event all
150    /// config fields within it will be set to 0. Specifically: `type_`,
151    /// `config`, `config1`, `config2`, and `config3`. This ensures that most
152    /// events do not have to worry about resetting state that other events may
153    /// have set.
154    pub fn event<E: Event>(&mut self, event: E) -> &mut Self {
155        // Reset existing config fields before updating for the event. This ensures
156        // that left over config values for the previous event don't cause
157        // an error.
158        self.attrs.type_ = bindings::PERF_TYPE_HARDWARE;
159        self.attrs.config = 0;
160        self.attrs.config1 = 0;
161        self.attrs.config2 = 0;
162        self.attrs.config3 = 0;
163
164        self.event_data = event.update_attrs_with_data(&mut self.attrs);
165        self
166    }
167
168    /// Construct a [`Counter`] according to the specifications made on this
169    /// `Builder`.
170    ///
171    /// If you want to add this counter to a group use [`build_with_group`]
172    /// instead.
173    ///
174    /// By default, a newly built [`Counter`] is disabled. To begin counting
175    /// events, you must call [`enable`] on the [`Counter`] or the [`Group`]
176    /// to which it belongs. Alternatively, certain options (e.g.
177    /// [`enable_on_exec`]) may be used to automatically enable the [`Counter`]
178    /// once certain events occur.
179    ///
180    /// [`build_with_group`]: Self::build_with_group
181    ///
182    /// # Errors
183    /// - The `perf_event_open` syscall has a large number of different errors
184    ///   it can return. See the [man page][0] for details. Unfortunately, the
185    ///   errors returned by the kernel are not always helpful.
186    /// - This method translates `E2BIG` errors (which means the kernel did not
187    ///   support some options) into a custom [`std::io::Error`] with kind
188    ///   [`ErrorKind::Unsupported`] and an internal error of
189    ///   [`UnsupportedOptionsError`]. This allows you to access the size of the
190    ///   [`perf_event_attr`] struct that the kernel was expecting.
191    ///
192    /// # Panics
193    /// This method panics if `attrs.size` has been set to a value larger than
194    /// the size of the [`perf_event_attr`] struct.
195    ///
196    /// [`Group`]: crate::Group
197    /// [`Group::add`]: crate::Group::add
198    /// [`enable`]: crate::Counter::enable
199    /// [`enable_on_exec`]: Builder::enable_on_exec
200    /// [0]: https://www.mankier.com/2/perf_event_open
201    pub fn build(&self) -> std::io::Result<Counter> {
202        Counter::new_internal(self.build_impl(None)?, ParseConfig::from(self.attrs))
203    }
204
205    /// Construct a [`Counter`] as part of a group.
206    ///
207    /// The `group` passed in must be the leader of the group you to add the
208    /// resulting [`Counter`] to.
209    ///
210    /// ## Notes
211    /// - The group leader does not have to be a [`Group`] (although it can be),
212    ///   any [`Counter`] will work just fine as a group leader provided it is
213    ///   not already within a group itself.
214    /// - Similarly with enabling, disabling, or resetting all counters in the
215    ///   group. Any counter in the group can do those via [`enable_group`],
216    ///   [`disable_group`], and [`reset_group`].
217    /// - The same applies for reading group values. Any counter that has
218    ///   [`ReadFormat::GROUP`] set in [`read_format`](Self::read_format)can
219    ///   read the counter values for the entire group using [`read_group`].
220    ///
221    /// Note, however, that [`Group`] is likely to be more convenient if you
222    /// don't want to set [`ReadFormat::GROUP`] on any of the counters
223    /// within the group.
224    ///
225    /// [`enable_group`]: crate::Counter::enable_group
226    /// [`disable_group`]: crate::Counter::disable_group
227    /// [`reset_group`]: crate::Counter::reset_group
228    /// [`read_group`]: crate::Counter::read_group
229    /// [`ReadFormat::GROUP`]: crate::ReadFormat::GROUP
230    ///
231    /// # Errors
232    /// - The `perf_event_open` syscall has a large number of different errors
233    ///   it can return. See the [man page][0] for details. Unfortunately, the
234    ///   errors returned by the kernel are not always helpful.
235    /// - This method translates `E2BIG` errors (which means the kernel did not
236    ///   support some options) into a custom [`std::io::Error`] with kind
237    ///   [`ErrorKind::Unsupported`] and an internal error of
238    ///   [`UnsupportedOptionsError`]. This allows you to access the size of the
239    ///   [`perf_event_attr`] struct that the kernel was expecting.
240    ///
241    /// [0]: https://www.mankier.com/2/perf_event_open
242    ///
243    /// # Panics
244    /// This method panics if `attrs.size` has been set to a value larger than
245    /// the size of the [`perf_event_attr`] struct.
246    pub fn build_with_group(&self, mut group: impl AsMut<Counter>) -> io::Result<Counter> {
247        let group: &mut Counter = group.as_mut();
248        let file = self.build_impl(Some(group.as_raw_fd()))?;
249
250        group.member_count = group
251            .member_count
252            .checked_add(1)
253            .expect("cannot add more than u32::MAX elements to a group");
254
255        Counter::new_internal(file, ParseConfig::from(self.attrs))
256    }
257
258    /// Build a [`Group`] according to the specifications made on this
259    /// `Builder`.
260    ///
261    /// Note that you will need to have set [`ReadFormat::GROUP`] within
262    /// [`read_format`] to or this method will error.
263    ///
264    /// [`read_format`]: Self::read_format
265    ///
266    /// # Notes
267    /// - A [`Group`] is just a wrapper around a [`Counter`] whose methods use
268    ///   the corresponding `*_group` methods on [`Counter`].
269    /// - The [`GroupData`] returned from [`Group::read`] doesn't include the
270    ///   group itself when being iterated over. You will likely want to use the
271    ///   [`Software::DUMMY`] event when constructing a group.
272    ///
273    /// # Errors
274    /// - All errors that can be returned from [`build`](Self::build).
275    /// - An error will be returned if [`ReadFormat::GROUP`] is not set within
276    ///   `read_format`. It will have a kind of [`ErrorKind::Other`].
277    ///
278    /// # Panics
279    /// This method panics if `attrs.size` has been set to a value larger than
280    /// the size of the [`perf_event_attr`] struct.
281    ///
282    /// [`GroupData`]: crate::GroupData
283    /// [`Software::DUMMY`]: crate::events::Software::DUMMY
284    pub fn build_group(&self) -> io::Result<Group> {
285        let read_format = ReadFormat::from_bits_retain(self.attrs.read_format);
286        if !read_format.contains(ReadFormat::GROUP) {
287            return Err(io::Error::new(
288                ErrorKind::Other,
289                "groups must be created with the GROUP flag enabled",
290            ));
291        }
292
293        Ok(Group(self.build()?))
294    }
295
296    pub(crate) fn build_impl(&self, group_fd: Option<RawFd>) -> io::Result<File> {
297        // Users of this crate can modify attrs.size (e.g. to use it for feature
298        // detection) but in order for the perf_event_open call to be safe it
299        // must not exceed the size of perf_event_attr.
300        assert!(self.attrs.size <= std::mem::size_of::<perf_event_attr>() as u32);
301
302        let cpu = match self.cpu {
303            Some(cpu) => cpu as c_int,
304            None => -1,
305        };
306
307        let (pid, flags) = self.who.as_args();
308        let group_fd = group_fd.unwrap_or(-1);
309
310        // Enable CLOEXEC by default. This the behaviour that the rust stdlib
311        // uses for all its file descriptors.
312        //
313        // If you need to get a perf_event_open fd which does not have CLOEXEC
314        // set then you can modify the flags after the fact with fcntl(2).
315        let flags = flags | sys::bindings::PERF_FLAG_FD_CLOEXEC;
316
317        let mut attrs = self.attrs;
318
319        let result = check_errno_syscall(|| unsafe {
320            sys::perf_event_open(&mut attrs, pid, cpu, group_fd, flags as c_ulong)
321        });
322
323        match result {
324            Ok(fd) => unsafe { Ok(File::from_raw_fd(fd)) },
325            // In case of an E2BIG error we return a custom error so that users
326            // can get at the size expected by the kernel if they want to.
327            Err(e) if e.raw_os_error() == Some(libc::E2BIG) => Err(std::io::Error::new(
328                ErrorKind::Unsupported,
329                UnsupportedOptionsError::new(attrs.size),
330            )),
331            Err(e) => Err(e),
332        }
333    }
334}
335
336impl<'a> Builder<'a> {
337    /// Directly access the [`perf_event_attr`] within this builder.
338    pub fn attrs(&self) -> &perf_event_attr {
339        &self.attrs
340    }
341
342    /// Directly access the [`perf_event_attr`] within this builder.
343    pub fn attrs_mut(&mut self) -> &mut perf_event_attr {
344        &mut self.attrs
345    }
346
347    /// Observe the calling process. (This is the default.)
348    pub fn observe_self(&mut self) -> &mut Self {
349        self.who = EventPid::ThisProcess;
350        self
351    }
352
353    /// Observe the process with the given process id. This requires
354    /// [`CAP_SYS_PTRACE`][man-capabilities] capabilities.
355    ///
356    /// [man-capabilities]: https://www.mankier.com/7/capabilities
357    pub fn observe_pid(&mut self, pid: pid_t) -> &mut Self {
358        self.who = EventPid::Other(pid);
359        self
360    }
361
362    /// Observe all processes.
363    ///
364    /// Linux does not support observing all processes on all CPUs without
365    /// restriction, so combining `any_pid` with [`any_cpu`] will cause the
366    /// final [`build`] to return an error. This must be used together with
367    /// [`one_cpu`], to select a specific CPU to observe.
368    ///
369    /// This requires [`CAP_PERFMON`][cap] or [`CAP_SYS_ADMIN`][cap]
370    /// capabilities, or a `/proc/sys/kernel/perf_event_paranoid` value of less
371    /// than 1.
372    ///
373    /// [`any_cpu`]: Builder::any_cpu
374    /// [`build`]: Builder::build
375    /// [`one_cpu`]: Builder::one_cpu
376    /// [cap]: https://www.mankier.com/7/capabilities
377    pub fn any_pid(&mut self) -> &mut Self {
378        self.who = EventPid::Any;
379        self
380    }
381
382    /// Observe code running in the given [cgroup][man-cgroups] (container). The
383    /// `cgroup` argument should be a `File` referring to the cgroup's directory
384    /// in the cgroupfs filesystem.
385    ///
386    /// [man-cgroups]: https://www.mankier.com/7/cgroups
387    pub fn observe_cgroup(&mut self, cgroup: &'a File) -> &mut Self {
388        self.who = EventPid::CGroup(cgroup);
389        self
390    }
391
392    /// Observe only code running on the given CPU core.
393    pub fn one_cpu(&mut self, cpu: usize) -> &mut Self {
394        self.cpu = Some(cpu);
395        self
396    }
397
398    /// Observe code running on any CPU core. (This is the default.)
399    ///
400    /// Linux does not support observing all processes on all CPUs without
401    /// restriction, so combining `any_cpu` with [`any_pid`] will cause
402    /// [`build`] to return an error. This must be used with [`observe_self`]
403    /// (the default), [`observe_pid`], or [`observe_cgroup`].
404    ///
405    /// [`any_pid`]: Builder::any_pid
406    /// [`build`]: Builder::build
407    /// [`observe_self`]: Builder::observe_self
408    /// [`observe_pid`]: Builder::observe_pid
409    /// [`observe_cgroup`]: Builder::observe_cgroup
410    pub fn any_cpu(&mut self) -> &mut Self {
411        self.cpu = None;
412        self
413    }
414
415    /// Indicate additional values to include in the generated sample events.
416    ///
417    /// Note that this method is additive and does not remove previously added
418    /// sample types. See the documentation of [`SampleFlag`] or the [manpage]
419    /// for what's available to be collected.
420    ///
421    /// # Example
422    /// Here we build a sampler that grabs the instruction pointer, process ID,
423    /// thread ID, and timestamp whenever the underlying event triggers a
424    /// sampling.
425    /// ```
426    /// # use perf_event::{Builder, SampleFlag};
427    /// # use perf_event::events::Hardware;
428    /// let mut sampler = Builder::new(Hardware::INSTRUCTIONS)
429    ///     .sample(SampleFlag::IP)
430    ///     .sample(SampleFlag::TID)
431    ///     .sample(SampleFlag::TIME)
432    ///     .build()?
433    ///     .sampled(8192)?;
434    /// # Ok::<_, std::io::Error>(())
435    /// ```
436    ///
437    /// [`SampleFlag`]: crate::SampleFlag
438    /// [manpage]: https://www.mankier.com/2/perf_event_open
439    pub fn sample(&mut self, sample: SampleFlag) -> &mut Self {
440        self.attrs.sample_type |= sample.bits();
441        self
442    }
443
444    /// Set the fields to include when reading from the counter.
445    ///
446    /// Note that this method is _not_ additive, unlike [`sample`].
447    ///
448    /// The implementation of this library will silently mask out certain flags
449    /// if they would be invalid. For example, we will not allow you to set
450    /// [`ReadFormat::GROUP`] when building a single counter.
451    ///
452    /// [`sample`]: Builder::sample
453    pub fn read_format(&mut self, mut read_format: ReadFormat) -> &mut Self {
454        if read_format.contains(ReadFormat::GROUP) {
455            read_format |= ReadFormat::ID;
456        }
457
458        self.attrs.read_format = read_format.bits();
459        self
460    }
461}
462
463// Section for methods which directly modify attrs. These should correspond
464// roughly 1-to-1 with the entries as documented in the manpage.
465impl<'a> Builder<'a> {
466    /// Whether this counter should start off enabled.
467    ///
468    /// When this is set, the counter will immediately start being recorded as
469    /// soon as it is created.
470    ///
471    /// By default, this is false.
472    pub fn enabled(&mut self, enabled: bool) -> &mut Self {
473        self.attrs.set_disabled((!enabled).into());
474        self
475    }
476
477    /// Set whether this counter is inherited by new threads.
478    ///
479    /// When this flag is set, this counter observes activity in new threads
480    /// created by any thread already being observed.
481    ///
482    /// By default, the flag is unset: counters are not inherited, and observe
483    /// only the threads specified when they are created.
484    ///
485    /// This flag cannot be set if the counter belongs to a `Group`. Doing so
486    /// will result in an error when the counter is built. This is a kernel
487    /// limitation.
488    pub fn inherit(&mut self, inherit: bool) -> &mut Self {
489        self.attrs.set_inherit(inherit.into());
490        self
491    }
492
493    /// Set whether the counter is pinned to the PMU.
494    ///
495    /// If this flag is set, the kernel will attempt to keep the counter on
496    /// always on the CPU if at all possible. If it fails to do so, the counter
497    /// will enter an error state where reading it will always return EOF. For
498    /// this crate, that would result in [`Counter::read`] returning an error
499    /// with kind [`ErrorKind::UnexpectedEof`].
500    ///
501    /// This option only applies to hardware counters and group leaders. At
502    /// this time this crate provides no way to configure group leaders so this
503    /// option will only work when the resulting counter is not in a group.
504    ///
505    /// This is false by default.
506    ///
507    /// [`ErrorKind::UnexpectedEof`]: std::io::ErrorKind::UnexpectedEof
508    pub fn pinned(&mut self, pinned: bool) -> &mut Self {
509        self.attrs.set_pinned(pinned.into());
510        self
511    }
512
513    /// Controls whether the counter or group can be scheduled onto a CPU
514    /// alongside other counters or groups.
515    ///
516    /// This is false by default.
517    pub fn exclusive(&mut self, exclusive: bool) -> &mut Self {
518        self.attrs.set_exclusive(exclusive.into());
519        self
520    }
521
522    /// Whether we should exclude events that occur in user space.
523    ///
524    /// This is false by default.
525    pub fn exclude_user(&mut self, exclude_user: bool) -> &mut Self {
526        self.attrs.set_exclude_user(exclude_user.into());
527        self
528    }
529
530    /// Whether we should exclude events that occur in kernel space.
531    ///
532    /// Note that setting this to false may result in permission errors if
533    /// the current `perf_event_paranoid` value is greater than 1.
534    ///
535    /// This is true by default.
536    pub fn exclude_kernel(&mut self, exclude_kernel: bool) -> &mut Self {
537        self.attrs.set_exclude_kernel(exclude_kernel.into());
538        self
539    }
540
541    /// Include kernel code.
542    ///
543    /// See [`exclude_kernel`](Builder::exclude_kernel).
544    pub fn include_kernel(&mut self) -> &mut Self {
545        self.exclude_kernel(false)
546    }
547
548    /// Whether we should exclude events that happen in the hypervisor.
549    ///
550    /// This is not supported on all architectures as it required built-in
551    /// support within the CPU itself.
552    ///
553    /// Note that setting this to false may result in permission errors if
554    /// the current `perf_event_paranoid` value is greater than 1.
555    ///
556    /// This is true by default
557    pub fn exclude_hv(&mut self, exclude_hv: bool) -> &mut Self {
558        self.attrs.set_exclude_hv(exclude_hv.into());
559        self
560    }
561
562    /// Include hypervisor code.
563    ///
564    /// See [`exclude_hv`](Builder::exclude_hv).
565    pub fn include_hv(&mut self) -> &mut Self {
566        self.exclude_hv(false)
567    }
568
569    /// Whether to exclude events that occur when running the idle task.
570    ///
571    /// Note that this only has an effect for software events.
572    pub fn exclude_idle(&mut self, exclude_idle: bool) -> &mut Self {
573        self.attrs.set_exclude_idle(exclude_idle.into());
574        self
575    }
576
577    /// Enable the generation of MMAP records for executable memory maps.
578    ///
579    /// MMAP records are emitted when the process/thread that is being
580    /// observed creates a new executable memory mapping.
581    pub fn mmap(&mut self, mmap: bool) -> &mut Self {
582        self.attrs.set_mmap(mmap.into());
583        self
584    }
585
586    /// Enable the tracking of process command name changes.
587    ///
588    /// This can happen when a process calls `execve(2)`, `prctl(PR_SET_NAME)`,
589    /// or writes to `/proc/self/comm`.
590    ///
591    /// If you also set the [`comm_exec`](Builder::comm_exec) flag, then the
592    /// kernel will indicate which of these process name changes were due to
593    /// calls to `execve(2)`.
594    pub fn comm(&mut self, comm: bool) -> &mut Self {
595        self.attrs.set_comm(comm.into());
596        self
597    }
598
599    /// Set the period at which the kernel will generate sample events.
600    ///
601    /// As an example, if the event is `Hardware::INSTRUCTIONS` and `period`
602    /// is 100_000 then every 100_000 instructions the kernel will generate an
603    /// event.
604    ///
605    /// Note that the actual precision at which the sample corresponds to the
606    /// instant and location at which Nth event occurred is controlled by the
607    /// [`precise_ip`] option.
608    ///
609    /// This setting is mutually exclusive with [`sample_frequency`].
610    ///
611    /// [`precise_ip`]: Builder::precise_ip
612    /// [`sample_frequency`]: Builder::sample_frequency
613    pub fn sample_period(&mut self, period: u64) -> &mut Self {
614        self.attrs.set_freq(0);
615        self.attrs.sample_period = period;
616        self
617    }
618
619    /// Set the frequency at which the kernel will generate sample events
620    /// (in Hz).
621    ///
622    /// Note that this is not guaranteed to be exact. The kernel will adjust
623    /// the period to attempt to keep the desired frequency but the rate at
624    /// which events occur varies drastically then samples may not occur at
625    /// the specified frequency.
626    ///
627    /// The amount to which samples correspond to the instant and location at
628    /// which an event occurred is controlled by the [`precise_ip`] option.
629    ///
630    /// This setting is mutually exclusive with [`sample_period`].
631    ///
632    /// [`precise_ip`]: Builder::precise_ip
633    /// [`sample_period`]: Builder::sample_period
634    pub fn sample_frequency(&mut self, frequency: u64) -> &mut Self {
635        self.attrs.set_freq(1);
636        self.attrs.sample_freq = frequency;
637        self
638    }
639
640    /// Save event counts on context switch for inherited tasks.
641    ///
642    /// This option is only meaningful if [`inherit`] is also enabled.
643    ///
644    /// [`inherit`]: Builder::inherit
645    pub fn inherit_stat(&mut self, inherit_stat: bool) -> &mut Self {
646        self.attrs.set_inherit_stat(inherit_stat.into());
647        self
648    }
649
650    /// Enable the counter automatically after a call to `execve(2)`.
651    pub fn enable_on_exec(&mut self, enable_on_exec: bool) -> &mut Self {
652        self.attrs.set_enable_on_exec(enable_on_exec.into());
653        self
654    }
655
656    /// If set, then the kernel will generate fork and exit records.
657    pub fn task(&mut self, task: bool) -> &mut Self {
658        self.attrs.set_task(task.into());
659        self
660    }
661
662    /// Set how many bytes will be written before the kernel sends an overflow
663    /// notification.
664    ///
665    /// This controls how much data will be emitted before
666    /// [`Sampler::next_blocking`] will wake up once blocked.
667    ///
668    /// This setting is mutually exclusive with [`wakeup_events`].
669    ///
670    /// [`wakeup_events`]: Self::wakeup_events
671    /// [`Sampler::next_blocking`]: crate::Sampler::next_blocking
672    pub fn wakeup_watermark(&mut self, watermark: usize) -> &mut Self {
673        self.attrs.set_watermark(1);
674        self.attrs.wakeup_watermark = watermark as _;
675        self
676    }
677
678    /// Set how many samples will be written before the kernel sends an
679    /// overflow notification.
680    ///
681    /// This controls how much data will be emitted before
682    /// [`Sampler::next_blocking`] will wake up once blocked. Note that only
683    /// sample records (`PERF_RECORD_SAMPLE`) count towards the event count.
684    ///
685    /// Some caveats apply, see the [manpage] for the full documentation.
686    ///
687    /// This method is mutually exclusive with [`wakeup_watermark`].
688    ///
689    /// [manpage]: https://www.mankier.com/2/perf_event_open
690    /// [`wakeup_watermark`]: Builder::wakeup_watermark
691    /// [`Sampler::next_blocking`]: crate::Sampler::next_blocking
692    pub fn wakeup_events(&mut self, events: usize) -> &mut Self {
693        self.attrs.set_watermark(0);
694        self.attrs.wakeup_events = events as _;
695        self
696    }
697
698    /// Control how much skid is permitted when recording events.
699    ///
700    /// Skid is the number of instructions that occur between an event occuring
701    /// and a sample being gathered by the kernel. Less skid is better but
702    /// there are hardware limitations around how small the skid can be.
703    ///
704    /// Also see [`SampleSkid`].
705    pub fn precise_ip(&mut self, skid: SampleSkid) -> &mut Self {
706        self.attrs.set_precise_ip(skid as _);
707        self
708    }
709
710    /// Enable the generation of MMAP records for non-executable memory maps.
711    ///
712    /// This is the data counterpart of [`mmap`](Builder::mmap).
713    pub fn mmap_data(&mut self, mmap_data: bool) -> &mut Self {
714        self.attrs.set_mmap_data(mmap_data.into());
715        self
716    }
717
718    /// If enabled, then a subset of the sample fields will additionally be
719    /// included in most non-`PERF_RECORD_SAMPLE` samples.
720    ///
721    /// See the [manpage] for the exact fields that are included and which
722    /// records include the trailer.
723    ///
724    /// [manpage]: https://www.mankier.com/2/perf_event_open
725    pub fn sample_id_all(&mut self, sample_id_all: bool) -> &mut Self {
726        self.attrs.set_sample_id_all(sample_id_all.into());
727        self
728    }
729
730    /// Only collect measurements for events occurring inside a VM instance.
731    ///
732    /// This is only meaningful when profiling from outside the VM instance.
733    ///
734    /// See the [manpage] for more documentation.
735    ///
736    /// [manpage]: https://www.mankier.com/2/perf_event_open
737    pub fn exclude_host(&mut self, exclude_host: bool) -> &mut Self {
738        self.attrs.set_exclude_host(exclude_host.into());
739        self
740    }
741
742    /// Don't collect measurements for events occurring inside a VM instance.
743    ///
744    /// This is only meaningful when profiling from outside the VM instance.
745    ///
746    /// See the [manpage] for more documentation.
747    ///
748    /// [manpage]: https://www.mankier.com/2/perf_event_open
749    pub fn exclude_guest(&mut self, exclude_guest: bool) -> &mut Self {
750        self.attrs.set_exclude_guest(exclude_guest.into());
751        self
752    }
753
754    /// Do not include stack frames in the kernel when gathering callchains as
755    /// a part of recording a sample.
756    pub fn exclude_callchain_kernel(&mut self, exclude_kernel: bool) -> &mut Self {
757        self.attrs
758            .set_exclude_callchain_kernel(exclude_kernel.into());
759        self
760    }
761
762    /// Do not include stack frames from userspace when gathering a callchain
763    /// as a part of recording a sample.
764    pub fn exclude_callchain_user(&mut self, exclude_user: bool) -> &mut Self {
765        self.attrs.set_exclude_callchain_user(exclude_user.into());
766        self
767    }
768
769    /// Generate an extended executable mmap record.
770    ///
771    /// This record has enough info to uniquely identify which instance of a
772    /// shared map it corresponds to. Note that you also need to set the `mmap`
773    /// option for this to work.
774    pub fn mmap2(&mut self, mmap2: bool) -> &mut Self {
775        self.attrs.set_mmap2(mmap2.into());
776        self
777    }
778
779    /// Check whether the kernel will annotate COMM records with the COMM_EXEC
780    /// bit when they occur due to an `execve(2)` call.
781    ///
782    /// This option doesn't actually change the behaviour of the kernel.
783    /// Instead, it is useful for feature detection.
784    pub fn comm_exec(&mut self, comm_exec: bool) -> &mut Self {
785        self.attrs.set_comm_exec(comm_exec.into());
786        self
787    }
788
789    /// Select which linux clock to use for timestamps.
790    ///
791    /// If `clockid` is `None` then the kernel will use an internal timer. This
792    /// timer may not be any of the options for clockid.
793    ///
794    /// See [`Clock`] and the [`clock_getttime(2)`][0] manpage for
795    /// documentation on what the different clock values mean.
796    ///
797    /// [0]: https://www.mankier.com/2/clock_gettime
798    pub fn clockid(&mut self, clockid: impl Into<Option<Clock>>) -> &mut Self {
799        let clockid = clockid.into();
800        self.attrs.set_use_clockid(clockid.is_some().into());
801        self.attrs.clockid = clockid.map(Clock::into_raw).unwrap_or(0);
802        self
803    }
804
805    /// Generate `SWITCH` records when a context switch occurs.
806    ///
807    /// Also enables the generation of `SWITCH_CPU_WIDE` records if profiling
808    /// in cpu-wide mode.
809    pub fn context_switch(&mut self, context_switch: bool) -> &mut Self {
810        self.attrs.set_context_switch(context_switch.into());
811        self
812    }
813
814    /// Generate `NAMESPACES` records when a task enters a new namespace.
815    pub fn namespaces(&mut self, namespaces: bool) -> &mut Self {
816        self.attrs.set_namespaces(namespaces.into());
817        self
818    }
819
820    /// Generate `KSYMBOL` records when kernel symbols are registered or
821    /// unregistered.
822    pub fn ksymbol(&mut self, ksymbol: bool) -> &mut Self {
823        self.attrs.set_ksymbol(ksymbol.into());
824        self
825    }
826
827    /// Generate `BPF_EVENT` records when eBPF programs are loaded or unloaded.
828    pub fn bpf_event(&mut self, bpf_event: bool) -> &mut Self {
829        self.attrs.set_bpf_event(bpf_event.into());
830        self
831    }
832
833    /// Output data for non-aux events to the aux buffer, if supported by the
834    /// hardware.
835    pub fn aux_output(&mut self, aux_output: bool) -> &mut Self {
836        self.attrs.set_aux_output(aux_output.into());
837        self
838    }
839
840    /// Generate `CGROUP` records when a new cgroup is created.
841    pub fn cgroup(&mut self, cgroup: bool) -> &mut Self {
842        self.attrs.set_cgroup(cgroup.into());
843        self
844    }
845
846    /// Generate `TEXT_POKE` records when the kernel text (i.e. code) is
847    /// modified.
848    pub fn text_poke(&mut self, text_poke: bool) -> &mut Self {
849        self.attrs.set_text_poke(text_poke.into());
850        self
851    }
852
853    /// Whether to include the build id in `MMAP2` events.
854    pub fn build_id(&mut self, build_id: bool) -> &mut Self {
855        self.attrs.set_build_id(build_id.into());
856        self
857    }
858
859    /// Only inherit the counter to new threads in the same process, not to
860    /// other processes.
861    pub fn inherit_thread(&mut self, inherit_thread: bool) -> &mut Self {
862        self.attrs.set_inherit_thread(inherit_thread.into());
863        self
864    }
865
866    /// Disable this counter when it successfully calls `execve(2)`.
867    pub fn remove_on_exec(&mut self, remove_on_exec: bool) -> &mut Self {
868        self.attrs.set_remove_on_exec(remove_on_exec.into());
869        self
870    }
871
872    /// Synchronously send `SIGTRAP` to the process that created the counter
873    /// when the sampled events overflow.
874    pub fn sigtrap(&mut self, sigtrap: bool) -> &mut Self {
875        self.attrs.set_sigtrap(sigtrap.into());
876        self
877    }
878
879    /// Copy data to the user's signal handler (via `si_perf` in `siginfo_t`).
880    ///
881    /// This can be used to figure out which event caused the signal to be sent.
882    /// It does nothing unless [`sigtrap`](Self::sigtrap) is also set to `true`.
883    pub fn sig_data(&mut self, sig_data: u64) -> &mut Self {
884        self.attrs.sig_data = sig_data;
885        self
886    }
887
888    /// Specify which branches to include in the branch record.
889    ///
890    /// This does nothing unless [`SampleFlag::BRANCH_STACK`] is specified in
891    /// the sample flags.
892    pub fn branch_sample_type(&mut self, flags: SampleBranchFlag) -> &mut Self {
893        self.attrs.branch_sample_type = flags.bits();
894        self
895    }
896
897    /// Specify which CPU registers to dump in a sample.
898    ///
899    /// This does nothing unless [`SampleFlag::REGS_USER`] is part of the
900    /// specified [`sample`](Builder::sample) flags.
901    ///
902    /// The actual layout of the register mask is architecture specific.
903    /// You will generally want the `PERF_REG_<arch>` constants in
904    /// [`perf_event_open_sys`]. (e.g. `PERF_REG_X86_SP`).
905    pub fn sample_regs_user(&mut self, regs: u64) -> &mut Self {
906        self.attrs.sample_regs_user = regs;
907        self
908    }
909
910    /// Specify which CPU registers to dump in a sample.
911    ///
912    /// This does nothing unless [`SampleFlag::REGS_INTR`] is part of the
913    /// specified [`sample`](Builder::sample) flags.
914    ///
915    /// The actual layout of the register mask is architecture specific.
916    /// You will generally want the `PERF_REG_<arch>` constants in
917    /// [`perf_event_open_sys`]. (e.g. `PERF_REG_X86_SP`).
918    pub fn sample_regs_intr(&mut self, regs: u64) -> &mut Self {
919        self.attrs.sample_regs_user = regs;
920        self
921    }
922
923    /// Specify the maximum size of the user stack to dump.
924    ///
925    /// This option does nothing unless [`SampleFlag::STACK_USER`] is set in the
926    /// sample flags.
927    ///
928    /// Note that the size of the array allocated within the sample record will
929    /// always be exactly this size, even if the actual collected stack data is
930    /// much smaller. The allocated sample buffer (when constructing a
931    /// [`Sampler`]) will need to be large enough to accommodate the chosen
932    /// stack size or else samples will be lost.
933    ///
934    /// [`Sampler`]: crate::Sampler
935    pub fn sample_stack_user(&mut self, stack: u32) -> &mut Self {
936        self.attrs.sample_stack_user = stack;
937        self
938    }
939
940    /// Specify the maximum number of stack frames to include when unwinding the
941    /// user stack.
942    ///
943    /// This does nothing unless [`SampleFlag::CALLCHAIN`] is set in the sample
944    /// flags.
945    ///
946    /// Note that the kernel has a user configurable limit specified at
947    /// `/proc/sys/kernel/perf_event_max_stack`. Setting `sample_max_stack` to
948    /// larger than that limit will result in an `EOVERFLOW` error when building
949    /// the counter.
950    pub fn sample_max_stack(&mut self, max_stack: u16) -> &mut Self {
951        self.attrs.sample_max_stack = max_stack;
952        self
953    }
954
955    /// Specify how much data is required before the kernel emits an AUX record.
956    pub fn aux_watermark(&mut self, watermark: u32) -> &mut Self {
957        self.attrs.aux_watermark = watermark;
958        self
959    }
960
961    /// Specify the desired size of AUX data.
962    ///
963    /// This does nothing unless [`SampleFlag::AUX`] is set in the sample flags.
964    /// Note that the emitted aux data can be smaller than the requested size.
965    pub fn aux_sample_size(&mut self, sample_size: u32) -> &mut Self {
966        self.attrs.aux_sample_size = sample_size;
967        self
968    }
969}
970
971impl fmt::Debug for Builder<'_> {
972    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
973        f.debug_struct("Builder")
974            .field("attrs", &self.attrs)
975            .field("who", &self.who)
976            .field("cpu", &self.cpu)
977            .field(
978                "event_data",
979                &self.event_data.as_ref().map(|_| "<dyn EventData>"),
980            )
981            .finish()
982    }
983}
984
985/// Attempted to build a counter using options that the current kernel does not
986/// support.
987///
988/// This error is returned as the inner error from [`Builder::build`] or
989/// [`Group::add`] if the kernel indicates that the [`perf_event_attr`]
990/// arguments contained options that the current kernel does not support.
991///
992/// This can be used to implement feature detection and fall back to a config
993/// which uses fewer options.
994///
995/// [`Group::add`]: crate::Group::add
996///
997/// # Example
998/// ```
999/// use perf_event::events::Software;
1000/// use perf_event::{Builder, UnsupportedOptionsError};
1001///
1002/// let mut builder = Builder::new(Software::DUMMY);
1003///
1004/// // The linux kernel will always return E2BIG when the size is less than
1005/// // PERF_ATTR_SIZE_VER0 (64) except if it is 0. This allows us to easily
1006/// // make an invalid call do figure out what size the kernel is expecting.
1007/// builder.attrs_mut().size = 1;
1008///
1009/// let error = builder.build().unwrap_err();
1010///
1011/// assert_eq!(error.kind(), std::io::ErrorKind::Unsupported);
1012/// assert_eq!(error.raw_os_error(), None);
1013///
1014/// let inner: &UnsupportedOptionsError = error.get_ref().unwrap().downcast_ref().unwrap();
1015///
1016/// println!("The expected size was {}", inner.expected_size());
1017/// ```
1018#[derive(Debug)]
1019pub struct UnsupportedOptionsError {
1020    expected_size: u32,
1021}
1022
1023impl UnsupportedOptionsError {
1024    pub(crate) fn new(expected_size: u32) -> Self {
1025        Self { expected_size }
1026    }
1027
1028    /// The size that the kernel expected the [`perf_event_attr`] struct to be.
1029    pub fn expected_size(&self) -> usize {
1030        self.expected_size as _
1031    }
1032}
1033
1034impl fmt::Display for UnsupportedOptionsError {
1035    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1036        f.write_str("perf_event_attr contained options not valid for the current kernel")
1037    }
1038}
1039
1040impl std::error::Error for UnsupportedOptionsError {}