mod_alloc/lib.rs
1//! # mod-alloc
2//!
3//! Allocation profiling for Rust. Tracks allocation counts, total
4//! bytes, peak resident memory, and current resident memory by
5//! wrapping the system allocator via [`GlobalAlloc`].
6//!
7//! Designed as a lean replacement for `dhat` with MSRV 1.75 and
8//! zero external dependencies on the hot path.
9//!
10//! ## Installing as the global allocator
11//!
12//! ```no_run
13//! use mod_alloc::{ModAlloc, Profiler};
14//!
15//! #[global_allocator]
16//! static GLOBAL: ModAlloc = ModAlloc::new();
17//!
18//! fn main() {
19//! let p = Profiler::start();
20//!
21//! let v: Vec<u64> = (0..1000).collect();
22//! drop(v);
23//!
24//! let stats = p.stop();
25//! println!("Allocations: {}", stats.alloc_count);
26//! println!("Total bytes: {}", stats.total_bytes);
27//! println!("Peak bytes (absolute): {}", stats.peak_bytes);
28//! }
29//! ```
30//!
31//! ## Counter semantics
32//!
33//! The four Tier 1 counters track allocator activity since the
34//! installed [`ModAlloc`] began counting (or since the last
35//! [`ModAlloc::reset`] call):
36//!
37//! | Counter | Updated on `alloc` | Updated on `dealloc` |
38//! |-----------------|-------------------------------|----------------------|
39//! | `alloc_count` | `+= 1` | (unchanged) |
40//! | `total_bytes` | `+= size` | (unchanged) |
41//! | `current_bytes` | `+= size` | `-= size` |
42//! | `peak_bytes` | high-water mark of `current` | (unchanged) |
43//!
44//! `realloc` is counted as one allocation event. `total_bytes`
45//! increases by the growth delta on a growing realloc and is
46//! unchanged on a shrinking realloc.
47//!
48//! ## Status
49//!
50//! v0.9.0 ships Tier 1 (counters) only. The `backtraces` and
51//! `dhat-compat` cargo features are defined for forward
52//! compatibility but compile as no-ops; Tier 2 (inline backtrace
53//! capture) lands in v0.9.1 and Tier 3 (DHAT-compatible JSON
54//! output) lands in v0.9.3.
55
56#![cfg_attr(docsrs, feature(doc_cfg))]
57#![warn(missing_docs)]
58#![warn(rust_2018_idioms)]
59
60use std::alloc::{GlobalAlloc, Layout, System};
61use std::cell::Cell;
62use std::ptr;
63use std::sync::atomic::{AtomicPtr, AtomicU64, Ordering};
64
65// Process-wide handle to the installed `ModAlloc`. Populated lazily
66// on the first non-reentrant alloc call. `Profiler` reads from this
67// to locate the canonical counters without requiring an explicit
68// registration call from the user.
69static GLOBAL_HANDLE: AtomicPtr<ModAlloc> = AtomicPtr::new(ptr::null_mut());
70
71thread_local! {
72 // Reentrancy flag. Set while inside the tracking path; if any
73 // allocation occurs while set, the recursive call bypasses
74 // tracking and forwards directly to the System allocator.
75 //
76 // `const` initialization (stable since 1.59) avoids any lazy
77 // construction allocation inside the TLS access path.
78 static IN_ALLOC: Cell<bool> = const { Cell::new(false) };
79}
80
81// RAII guard for the reentrancy flag. `enter` returns `None` if the
82// current thread is already inside a tracked allocation (caller
83// must skip counter updates) or if TLS is unavailable (e.g. during
84// thread teardown). The guard clears the flag on drop.
85struct ReentryGuard;
86
87impl ReentryGuard {
88 fn enter() -> Option<Self> {
89 IN_ALLOC
90 .try_with(|flag| {
91 if flag.get() {
92 None
93 } else {
94 flag.set(true);
95 Some(ReentryGuard)
96 }
97 })
98 .ok()
99 .flatten()
100 }
101}
102
103impl Drop for ReentryGuard {
104 fn drop(&mut self) {
105 let _ = IN_ALLOC.try_with(|flag| flag.set(false));
106 }
107}
108
109/// Global allocator wrapper that tracks allocations.
110///
111/// Install as `#[global_allocator]` to enable tracking. The wrapper
112/// forwards every allocation, deallocation, reallocation, and
113/// zero-initialised allocation to [`std::alloc::System`] and records
114/// the event in four lock-free [`AtomicU64`] counters.
115///
116/// # Example
117///
118/// ```no_run
119/// use mod_alloc::ModAlloc;
120///
121/// #[global_allocator]
122/// static GLOBAL: ModAlloc = ModAlloc::new();
123///
124/// fn main() {
125/// let v: Vec<u8> = vec![0; 1024];
126/// let stats = GLOBAL.snapshot();
127/// assert!(stats.alloc_count >= 1);
128/// drop(v);
129/// }
130/// ```
131pub struct ModAlloc {
132 alloc_count: AtomicU64,
133 total_bytes: AtomicU64,
134 peak_bytes: AtomicU64,
135 current_bytes: AtomicU64,
136}
137
138impl ModAlloc {
139 /// Construct a new `ModAlloc` allocator wrapper.
140 ///
141 /// All counters start at zero. This function is `const`, which
142 /// allows construction in a `static` for use as
143 /// `#[global_allocator]`.
144 ///
145 /// # Example
146 ///
147 /// ```
148 /// use mod_alloc::ModAlloc;
149 ///
150 /// static GLOBAL: ModAlloc = ModAlloc::new();
151 /// let stats = GLOBAL.snapshot();
152 /// assert_eq!(stats.alloc_count, 0);
153 /// ```
154 pub const fn new() -> Self {
155 Self {
156 alloc_count: AtomicU64::new(0),
157 total_bytes: AtomicU64::new(0),
158 peak_bytes: AtomicU64::new(0),
159 current_bytes: AtomicU64::new(0),
160 }
161 }
162
163 /// Snapshot the current counter values.
164 ///
165 /// Each counter is read independently with `Relaxed` ordering;
166 /// the resulting [`AllocStats`] is a coherent best-effort view
167 /// but does not represent a single atomic moment in time. For
168 /// scoped measurement, prefer [`Profiler`].
169 ///
170 /// # Example
171 ///
172 /// ```
173 /// use mod_alloc::ModAlloc;
174 ///
175 /// let alloc = ModAlloc::new();
176 /// let stats = alloc.snapshot();
177 /// assert_eq!(stats.alloc_count, 0);
178 /// ```
179 pub fn snapshot(&self) -> AllocStats {
180 AllocStats {
181 alloc_count: self.alloc_count.load(Ordering::Relaxed),
182 total_bytes: self.total_bytes.load(Ordering::Relaxed),
183 peak_bytes: self.peak_bytes.load(Ordering::Relaxed),
184 current_bytes: self.current_bytes.load(Ordering::Relaxed),
185 }
186 }
187
188 /// Reset all counters to zero.
189 ///
190 /// Intended for use at the start of a profile run, before any
191 /// outstanding allocations exist. Calling `reset` while
192 /// allocations are live can cause `current_bytes` to wrap on
193 /// subsequent deallocations; the other counters are unaffected.
194 ///
195 /// # Example
196 ///
197 /// ```
198 /// use mod_alloc::ModAlloc;
199 ///
200 /// let alloc = ModAlloc::new();
201 /// alloc.reset();
202 /// let stats = alloc.snapshot();
203 /// assert_eq!(stats.alloc_count, 0);
204 /// ```
205 pub fn reset(&self) {
206 self.alloc_count.store(0, Ordering::Relaxed);
207 self.total_bytes.store(0, Ordering::Relaxed);
208 self.peak_bytes.store(0, Ordering::Relaxed);
209 self.current_bytes.store(0, Ordering::Relaxed);
210 }
211
212 #[inline]
213 fn record_alloc(&self, size: u64) {
214 self.alloc_count.fetch_add(1, Ordering::Relaxed);
215 self.total_bytes.fetch_add(size, Ordering::Relaxed);
216 let new_current = self.current_bytes.fetch_add(size, Ordering::Relaxed) + size;
217 self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
218 }
219
220 #[inline]
221 fn record_dealloc(&self, size: u64) {
222 self.current_bytes.fetch_sub(size, Ordering::Relaxed);
223 }
224
225 #[inline]
226 fn record_realloc(&self, old_size: u64, new_size: u64) {
227 self.alloc_count.fetch_add(1, Ordering::Relaxed);
228 if new_size > old_size {
229 let delta = new_size - old_size;
230 self.total_bytes.fetch_add(delta, Ordering::Relaxed);
231 let new_current = self.current_bytes.fetch_add(delta, Ordering::Relaxed) + delta;
232 self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
233 } else if new_size < old_size {
234 self.current_bytes
235 .fetch_sub(old_size - new_size, Ordering::Relaxed);
236 }
237 }
238
239 #[inline]
240 fn register_self(&self) {
241 if GLOBAL_HANDLE.load(Ordering::Relaxed).is_null() {
242 let _ = GLOBAL_HANDLE.compare_exchange(
243 ptr::null_mut(),
244 self as *const ModAlloc as *mut ModAlloc,
245 Ordering::Release,
246 Ordering::Relaxed,
247 );
248 }
249 }
250}
251
252impl Default for ModAlloc {
253 fn default() -> Self {
254 Self::new()
255 }
256}
257
258// SAFETY: `ModAlloc` adds counter bookkeeping but performs all
259// underlying allocation through [`std::alloc::System`]. Each method
260// forwards its arguments unchanged to `System` and only inspects
261// the result; size/alignment invariants required by the
262// `GlobalAlloc` contract are passed through unmodified, so the
263// caller's contract to us becomes our contract to System.
264//
265// The counter-update path uses thread-local reentrancy detection
266// (see `ReentryGuard`) so that any allocation triggered transitively
267// inside the tracking path bypasses tracking and forwards directly
268// to System, preserving the "hook MUST NOT itself allocate"
269// invariant from REPS section 4.
270unsafe impl GlobalAlloc for ModAlloc {
271 unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
272 // SAFETY: per `GlobalAlloc::alloc`, `layout` has non-zero
273 // size; we forward unchanged to `System.alloc`, which has
274 // the same contract.
275 let ptr = unsafe { System.alloc(layout) };
276 if !ptr.is_null() {
277 if let Some(_g) = ReentryGuard::enter() {
278 self.record_alloc(layout.size() as u64);
279 self.register_self();
280 }
281 }
282 ptr
283 }
284
285 unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
286 // SAFETY: same invariants as `alloc`; `layout` forwarded
287 // unchanged. `System.alloc_zeroed` zero-fills the returned
288 // memory, satisfying the `GlobalAlloc::alloc_zeroed`
289 // contract.
290 let ptr = unsafe { System.alloc_zeroed(layout) };
291 if !ptr.is_null() {
292 if let Some(_g) = ReentryGuard::enter() {
293 self.record_alloc(layout.size() as u64);
294 self.register_self();
295 }
296 }
297 ptr
298 }
299
300 unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
301 // SAFETY: per `GlobalAlloc::dealloc`, `ptr` was returned by a
302 // prior call to `alloc`/`alloc_zeroed`/`realloc` on this
303 // allocator with the given `layout`; we forwarded all of
304 // those to `System` with the same `layout`, so the inverse
305 // pairing for `System.dealloc(ptr, layout)` is valid.
306 unsafe { System.dealloc(ptr, layout) };
307 if let Some(_g) = ReentryGuard::enter() {
308 self.record_dealloc(layout.size() as u64);
309 }
310 }
311
312 unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
313 // SAFETY: per `GlobalAlloc::realloc`, `ptr` was returned by
314 // a prior allocation with `layout`, `new_size` is non-zero,
315 // and the alignment in `layout` remains valid for the new
316 // size. We forward all three to `System.realloc` which has
317 // the same contract.
318 let new_ptr = unsafe { System.realloc(ptr, layout, new_size) };
319 if !new_ptr.is_null() {
320 if let Some(_g) = ReentryGuard::enter() {
321 self.record_realloc(layout.size() as u64, new_size as u64);
322 self.register_self();
323 }
324 }
325 new_ptr
326 }
327}
328
329/// Snapshot of allocation statistics at a point in time.
330///
331/// Produced by [`ModAlloc::snapshot`] and [`Profiler::stop`].
332///
333/// # Example
334///
335/// ```
336/// use mod_alloc::AllocStats;
337///
338/// let stats = AllocStats {
339/// alloc_count: 10,
340/// total_bytes: 1024,
341/// peak_bytes: 512,
342/// current_bytes: 256,
343/// };
344/// assert_eq!(stats.alloc_count, 10);
345/// ```
346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
347pub struct AllocStats {
348 /// Number of allocations performed.
349 pub alloc_count: u64,
350 /// Total bytes allocated across all allocations. Reallocations
351 /// contribute the growth delta (or zero on shrink).
352 pub total_bytes: u64,
353 /// Peak resident bytes (highest `current_bytes` ever observed).
354 pub peak_bytes: u64,
355 /// Currently-allocated bytes (allocations minus deallocations).
356 pub current_bytes: u64,
357}
358
359/// Scoped profiler that captures a delta between start and stop.
360///
361/// Read the snapshot of the installed [`ModAlloc`] on construction
362/// and again on [`Profiler::stop`], returning the difference. If no
363/// `ModAlloc` is installed as `#[global_allocator]` and no
364/// allocation has occurred through it yet, both snapshots are
365/// zero and the delta is zero.
366///
367/// # Example
368///
369/// ```no_run
370/// use mod_alloc::{ModAlloc, Profiler};
371///
372/// #[global_allocator]
373/// static GLOBAL: ModAlloc = ModAlloc::new();
374///
375/// fn main() {
376/// let p = Profiler::start();
377/// let v: Vec<u8> = vec![0; 1024];
378/// drop(v);
379/// let stats = p.stop();
380/// println!("Captured {} alloc events", stats.alloc_count);
381/// }
382/// ```
383pub struct Profiler {
384 baseline: AllocStats,
385}
386
387impl Profiler {
388 /// Begin profiling, capturing the current allocation state.
389 ///
390 /// If no `ModAlloc` is installed as `#[global_allocator]` or no
391 /// allocation has occurred yet, the captured baseline is all
392 /// zeros.
393 ///
394 /// # Example
395 ///
396 /// ```
397 /// use mod_alloc::Profiler;
398 ///
399 /// let p = Profiler::start();
400 /// let _delta = p.stop();
401 /// ```
402 pub fn start() -> Self {
403 Self {
404 baseline: current_snapshot_or_zeros(),
405 }
406 }
407
408 /// Stop profiling and return the delta from start.
409 ///
410 /// `alloc_count`, `total_bytes`, and `current_bytes` are deltas
411 /// from `start()` to `stop()`. `peak_bytes` is the absolute
412 /// high-water mark observed during the profiling window (peak
413 /// has no meaningful delta semantic).
414 ///
415 /// # Example
416 ///
417 /// ```
418 /// use mod_alloc::Profiler;
419 ///
420 /// let p = Profiler::start();
421 /// let stats = p.stop();
422 /// assert_eq!(stats.alloc_count, 0);
423 /// ```
424 pub fn stop(self) -> AllocStats {
425 let now = current_snapshot_or_zeros();
426 AllocStats {
427 alloc_count: now.alloc_count.saturating_sub(self.baseline.alloc_count),
428 total_bytes: now.total_bytes.saturating_sub(self.baseline.total_bytes),
429 current_bytes: now
430 .current_bytes
431 .saturating_sub(self.baseline.current_bytes),
432 peak_bytes: now.peak_bytes,
433 }
434 }
435}
436
437fn current_snapshot_or_zeros() -> AllocStats {
438 let p = GLOBAL_HANDLE.load(Ordering::Acquire);
439 if p.is_null() {
440 AllocStats {
441 alloc_count: 0,
442 total_bytes: 0,
443 peak_bytes: 0,
444 current_bytes: 0,
445 }
446 } else {
447 // SAFETY: `GLOBAL_HANDLE` is only ever set by
448 // `ModAlloc::register_self` to point at the address of a
449 // `#[global_allocator] static` (or any other `'static`
450 // `ModAlloc`). That target has `'static` lifetime, so the
451 // pointer remains valid for the remainder of the program.
452 // We produce only a shared borrow used to call `&self`
453 // methods that read atomic counters; no mutation through
454 // the pointer occurs here.
455 unsafe { (*p).snapshot() }
456 }
457}
458
459#[cfg(test)]
460mod tests {
461 use super::*;
462
463 #[test]
464 fn allocator_constructs() {
465 let _ = ModAlloc::new();
466 }
467
468 #[test]
469 fn snapshot_returns_zeros_initially() {
470 let a = ModAlloc::new();
471 let s = a.snapshot();
472 assert_eq!(s.alloc_count, 0);
473 assert_eq!(s.total_bytes, 0);
474 assert_eq!(s.peak_bytes, 0);
475 assert_eq!(s.current_bytes, 0);
476 }
477
478 #[test]
479 fn reset_works() {
480 let a = ModAlloc::new();
481 a.reset();
482 let s = a.snapshot();
483 assert_eq!(s.alloc_count, 0);
484 }
485
486 #[test]
487 fn record_alloc_updates_counters() {
488 let a = ModAlloc::new();
489 a.record_alloc(128);
490 a.record_alloc(256);
491 let s = a.snapshot();
492 assert_eq!(s.alloc_count, 2);
493 assert_eq!(s.total_bytes, 384);
494 assert_eq!(s.current_bytes, 384);
495 assert_eq!(s.peak_bytes, 384);
496 }
497
498 #[test]
499 fn record_dealloc_decreases_current_only() {
500 let a = ModAlloc::new();
501 a.record_alloc(1000);
502 a.record_dealloc(400);
503 let s = a.snapshot();
504 assert_eq!(s.alloc_count, 1);
505 assert_eq!(s.total_bytes, 1000);
506 assert_eq!(s.current_bytes, 600);
507 assert_eq!(s.peak_bytes, 1000);
508 }
509
510 #[test]
511 fn record_realloc_growth_updates_total_and_peak() {
512 let a = ModAlloc::new();
513 a.record_alloc(100);
514 a.record_realloc(100, 250);
515 let s = a.snapshot();
516 assert_eq!(s.alloc_count, 2);
517 assert_eq!(s.total_bytes, 250);
518 assert_eq!(s.current_bytes, 250);
519 assert_eq!(s.peak_bytes, 250);
520 }
521
522 #[test]
523 fn record_realloc_shrink_only_adjusts_current() {
524 let a = ModAlloc::new();
525 a.record_alloc(500);
526 a.record_realloc(500, 200);
527 let s = a.snapshot();
528 assert_eq!(s.alloc_count, 2);
529 assert_eq!(s.total_bytes, 500);
530 assert_eq!(s.current_bytes, 200);
531 assert_eq!(s.peak_bytes, 500);
532 }
533
534 #[test]
535 fn peak_holds_high_water_mark() {
536 let a = ModAlloc::new();
537 a.record_alloc(1000);
538 a.record_dealloc(1000);
539 a.record_alloc(500);
540 let s = a.snapshot();
541 assert_eq!(s.peak_bytes, 1000);
542 assert_eq!(s.current_bytes, 500);
543 }
544
545 #[test]
546 fn reentry_guard_blocks_nested_entry() {
547 let outer = ReentryGuard::enter();
548 assert!(outer.is_some());
549 let inner = ReentryGuard::enter();
550 assert!(inner.is_none(), "nested entry must be denied");
551 drop(outer);
552 let after = ReentryGuard::enter();
553 assert!(after.is_some(), "entry must be allowed after outer drops");
554 }
555
556 #[test]
557 fn profiler_start_stop_with_no_handle() {
558 let p = Profiler::start();
559 let s = p.stop();
560 assert_eq!(s.alloc_count, 0);
561 }
562}