darwin_kperf/sampler/mod.rs
1//! Hardware performance counter sampling.
2//!
3//! # Workflow
4//!
5//! 1. Create a [`Sampler`] to load the frameworks, detect the CPU, and force-acquire all hardware
6//! counters.
7//! 2. Call [`Sampler::thread`] with an array of [`Event`]s to create a [`ThreadSampler`] bound to
8//! the calling thread.
9//! 3. Use [`start`](ThreadSampler::start), [`sample`](ThreadSampler::sample), and
10//! [`stop`](ThreadSampler::stop) to toggle counting and read raw values.
11//! 4. Compute deltas between successive samples to get per-region counts.
12//!
13//! Both types clean up on drop: [`ThreadSampler`] stops counting and frees
14//! its config; [`Sampler`] restores the previous `force_all_ctrs` state and
15//! frees the database.
16
17pub(crate) mod error;
18mod ll;
19mod thread;
20
21use core::{ffi::c_int, fmt, ptr::NonNull};
22
23use darwin_kperf_sys::kperfdata::kpep_db;
24
25use self::error::try_kpc;
26pub use self::{error::SamplerError, thread::ThreadSampler};
27use crate::{
28 database::Database,
29 event::{Cpu, Event},
30 framework::{KPerf, KPerfData},
31};
32
33/// Session-scoped handle for hardware performance counters.
34///
35/// When you create a `Sampler`, it loads Apple's private `kperf.framework` and
36/// `kperfdata.framework`, detects which CPU you're running on, opens the
37/// corresponding PMC event database, and force-acquires all hardware counters.
38///
39/// "Force-acquiring" means taking control of the counters that macOS normally
40/// reserves for the OS Power Manager. Without this step, you can only access
41/// a subset of the available counters. The previous force-all state is saved
42/// at construction and restored when the `Sampler` is dropped, so other tools
43/// that rely on those counters (like `powermetrics`) are only affected while
44/// the `Sampler` is alive.
45///
46/// You should typically create one `Sampler` per process. Creating multiple
47/// `Sampler`s is safe but will interfere with the save/restore of the
48/// force-all-counters state, since each one independently saves and restores
49/// the `sysctl` value on drop.
50///
51/// A `Sampler` is [`Send`] + [`Sync`] because all of its operations are
52/// stateless `sysctl` calls. Use [`thread`](Self::thread) to create per-thread
53/// [`ThreadSampler`]s that read the actual counter values.
54pub struct Sampler {
55 kperf: KPerf,
56 kperfdata: KPerfData,
57 db: NonNull<kpep_db>,
58 cpu: Cpu,
59 saved_force_all: c_int,
60}
61
62// SAFETY: the raw `kpep_db` pointer is only accessed behind `&self` after
63// construction, and the framework vtable calls are stateless sysctl
64// operations. The `kpep_db` itself is not modified after `kpep_db_create`.
65unsafe impl Send for Sampler {}
66
67// SAFETY: all `&self` methods on Sampler only read from the db and dispatch
68// through vtable function pointers (stateless sysctl wrappers). No interior
69// mutability.
70unsafe impl Sync for Sampler {}
71
72impl fmt::Debug for Sampler {
73 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
74 fmt.debug_struct("Sampler")
75 .field("cpu", &self.cpu)
76 .finish_non_exhaustive()
77 }
78}
79
80impl Sampler {
81 /// Creates a new sampler for the current CPU.
82 ///
83 /// # Errors
84 ///
85 /// Returns [`SamplerError`] if a framework fails to load, the CPU is
86 /// unrecognized, or counter acquisition fails (typically due to missing
87 /// root privileges).
88 pub fn new() -> Result<Self, SamplerError> {
89 self::ll::ll_init()
90 }
91
92 /// Releases force-acquired counters back to the OS Power Manager.
93 ///
94 /// Restores the `force_all_ctrs` state saved at construction. This performs
95 /// the same teardown step as [`Drop`], but without freeing the database.
96 /// Intended for `static` samplers that outlive the measurement phase (e.g.
97 /// a Criterion harness) and need to relinquish counters before process
98 /// exit.
99 ///
100 /// # Safety
101 ///
102 /// The caller must ensure no [`ThreadSampler`] created from this `Sampler`
103 /// is currently running. Releasing counters while a `ThreadSampler` is
104 /// actively counting produces undefined hardware counter behavior.
105 ///
106 /// # Errors
107 ///
108 /// Returns [`SamplerError::FailedToForceAllCounters`] if the kernel
109 /// rejects the sysctl write.
110 pub unsafe fn release(&self) -> Result<(), SamplerError> {
111 let kpc_vt = self.kperf.vtable();
112
113 // SAFETY: restores the force_all_ctrs value saved at init.
114 let result = unsafe { (kpc_vt.kpc_force_all_ctrs_set)(self.saved_force_all) };
115
116 try_kpc(result, SamplerError::FailedToForceAllCounters)
117 }
118
119 /// The loaded `kperf.framework` handle.
120 #[must_use]
121 pub const fn kperf(&self) -> &KPerf {
122 &self.kperf
123 }
124
125 /// The loaded `kperfdata.framework` handle.
126 #[must_use]
127 pub const fn kperfdata(&self) -> &KPerfData {
128 &self.kperfdata
129 }
130
131 /// The detected CPU generation.
132 #[must_use]
133 pub const fn cpu(&self) -> Cpu {
134 self.cpu
135 }
136
137 /// A safe view of the PMC event database for the detected CPU.
138 #[must_use]
139 pub const fn database(&self) -> Database<'_> {
140 // SAFETY: db was allocated by kpep_db_create and remains valid for
141 // the lifetime of the Sampler.
142 unsafe { Database::from_raw(&*self.db.as_ptr()) }
143 }
144
145 /// Creates a [`ThreadSampler`] configured for the given events.
146 ///
147 /// The returned `ThreadSampler` is `!Send + !Sync` and must be used on
148 /// the thread that created it.
149 ///
150 /// # Errors
151 ///
152 /// Returns [`SamplerError`] if any event is unavailable on the current CPU
153 /// or if counter programming fails.
154 pub fn thread<const N: usize>(
155 &self,
156 events: [Event; N],
157 ) -> Result<ThreadSampler<'_, N>, SamplerError> {
158 self::ll::ll_configure(self, events)
159 }
160}
161
162impl Drop for Sampler {
163 fn drop(&mut self) {
164 self::ll::ll_drop(self);
165 }
166}