Skip to main content

mod_alloc/
lib.rs

1//! # mod-alloc
2//!
3//! Allocation profiling for Rust. Tracks allocation counts, total
4//! bytes, peak resident memory, and current resident memory by
5//! wrapping the system allocator via [`GlobalAlloc`].
6//!
7//! Designed as a lean replacement for `dhat` with MSRV 1.75 and
8//! zero external dependencies on the hot path.
9//!
10//! ## Installing as the global allocator
11//!
12//! ```no_run
13//! use mod_alloc::{ModAlloc, Profiler};
14//!
15//! #[global_allocator]
16//! static GLOBAL: ModAlloc = ModAlloc::new();
17//!
18//! fn main() {
19//!     let p = Profiler::start();
20//!
21//!     let v: Vec<u64> = (0..1000).collect();
22//!     drop(v);
23//!
24//!     let stats = p.stop();
25//!     println!("Allocations: {}", stats.alloc_count);
26//!     println!("Total bytes: {}", stats.total_bytes);
27//!     println!("Peak bytes (absolute): {}", stats.peak_bytes);
28//! }
29//! ```
30//!
31//! ## Counter semantics
32//!
33//! The four Tier 1 counters track allocator activity since the
34//! installed [`ModAlloc`] began counting (or since the last
35//! [`ModAlloc::reset`] call):
36//!
37//! | Counter         | Updated on `alloc`            | Updated on `dealloc` |
38//! |-----------------|-------------------------------|----------------------|
39//! | `alloc_count`   | `+= 1`                        | (unchanged)          |
40//! | `total_bytes`   | `+= size`                     | (unchanged)          |
41//! | `current_bytes` | `+= size`                     | `-= size`            |
42//! | `peak_bytes`    | high-water mark of `current`  | (unchanged)          |
43//!
44//! `realloc` is counted as one allocation event. `total_bytes`
45//! increases by the growth delta on a growing realloc and is
46//! unchanged on a shrinking realloc.
47//!
48//! ## Status
49//!
50//! v0.9.1 adds Tier 2 (inline backtrace capture) behind the
51//! `backtraces` feature. Default builds still ship Tier 1
52//! counters only. Tier 3 (DHAT-compatible JSON output) lands in
53//! v0.9.3.
54//!
55//! ## Backtraces (`backtraces` feature)
56//!
57//! With `mod-alloc = { version = "0.9", features = ["backtraces"] }`
58//! and `RUSTFLAGS="-C force-frame-pointers=yes"`, each tracked
59//! allocation captures up to 8 frames of its call site via inline
60//! frame-pointer walking on `x86_64` and `aarch64`. Per-call-site
61//! aggregation is exposed via `ModAlloc::call_sites` (available
62//! only with the `backtraces` feature); the result is raw return
63//! addresses. Symbolication ships in v0.9.2.
64//!
65//! Aggregation-table size is controlled by the `MOD_ALLOC_BUCKETS`
66//! environment variable at process start (default 4,096 buckets,
67//! ~384 KB).
68
69#![cfg_attr(docsrs, feature(doc_cfg))]
70#![warn(missing_docs)]
71#![warn(rust_2018_idioms)]
72
73#[cfg(feature = "backtraces")]
74mod backtrace;
75
76#[cfg(feature = "backtraces")]
77pub use backtrace::CallSiteStats;
78
79use std::alloc::{GlobalAlloc, Layout, System};
80use std::cell::Cell;
81use std::ptr;
82use std::sync::atomic::{AtomicPtr, AtomicU64, Ordering};
83
84// Process-wide handle to the installed `ModAlloc`. Populated lazily
85// on the first non-reentrant alloc call. `Profiler` reads from this
86// to locate the canonical counters without requiring an explicit
87// registration call from the user.
88static GLOBAL_HANDLE: AtomicPtr<ModAlloc> = AtomicPtr::new(ptr::null_mut());
89
90thread_local! {
91    // Reentrancy flag. Set while inside the tracking path; if any
92    // allocation occurs while set, the recursive call bypasses
93    // tracking and forwards directly to the System allocator.
94    //
95    // `const` initialization (stable since 1.59) avoids any lazy
96    // construction allocation inside the TLS access path.
97    static IN_ALLOC: Cell<bool> = const { Cell::new(false) };
98}
99
100// RAII guard for the reentrancy flag. `enter` returns `None` if the
101// current thread is already inside a tracked allocation (caller
102// must skip counter updates) or if TLS is unavailable (e.g. during
103// thread teardown). The guard clears the flag on drop.
104struct ReentryGuard;
105
106impl ReentryGuard {
107    fn enter() -> Option<Self> {
108        IN_ALLOC
109            .try_with(|flag| {
110                if flag.get() {
111                    None
112                } else {
113                    flag.set(true);
114                    Some(ReentryGuard)
115                }
116            })
117            .ok()
118            .flatten()
119    }
120}
121
122impl Drop for ReentryGuard {
123    fn drop(&mut self) {
124        let _ = IN_ALLOC.try_with(|flag| flag.set(false));
125    }
126}
127
128/// Global allocator wrapper that tracks allocations.
129///
130/// Install as `#[global_allocator]` to enable tracking. The wrapper
131/// forwards every allocation, deallocation, reallocation, and
132/// zero-initialised allocation to [`std::alloc::System`] and records
133/// the event in four lock-free [`AtomicU64`] counters.
134///
135/// # Example
136///
137/// ```no_run
138/// use mod_alloc::ModAlloc;
139///
140/// #[global_allocator]
141/// static GLOBAL: ModAlloc = ModAlloc::new();
142///
143/// fn main() {
144///     let v: Vec<u8> = vec![0; 1024];
145///     let stats = GLOBAL.snapshot();
146///     assert!(stats.alloc_count >= 1);
147///     drop(v);
148/// }
149/// ```
150pub struct ModAlloc {
151    alloc_count: AtomicU64,
152    total_bytes: AtomicU64,
153    peak_bytes: AtomicU64,
154    current_bytes: AtomicU64,
155}
156
157impl ModAlloc {
158    /// Construct a new `ModAlloc` allocator wrapper.
159    ///
160    /// All counters start at zero. This function is `const`, which
161    /// allows construction in a `static` for use as
162    /// `#[global_allocator]`.
163    ///
164    /// # Example
165    ///
166    /// ```
167    /// use mod_alloc::ModAlloc;
168    ///
169    /// static GLOBAL: ModAlloc = ModAlloc::new();
170    /// let stats = GLOBAL.snapshot();
171    /// assert_eq!(stats.alloc_count, 0);
172    /// ```
173    pub const fn new() -> Self {
174        Self {
175            alloc_count: AtomicU64::new(0),
176            total_bytes: AtomicU64::new(0),
177            peak_bytes: AtomicU64::new(0),
178            current_bytes: AtomicU64::new(0),
179        }
180    }
181
182    /// Snapshot the current counter values.
183    ///
184    /// Each counter is read independently with `Relaxed` ordering;
185    /// the resulting [`AllocStats`] is a coherent best-effort view
186    /// but does not represent a single atomic moment in time. For
187    /// scoped measurement, prefer [`Profiler`].
188    ///
189    /// # Example
190    ///
191    /// ```
192    /// use mod_alloc::ModAlloc;
193    ///
194    /// let alloc = ModAlloc::new();
195    /// let stats = alloc.snapshot();
196    /// assert_eq!(stats.alloc_count, 0);
197    /// ```
198    pub fn snapshot(&self) -> AllocStats {
199        AllocStats {
200            alloc_count: self.alloc_count.load(Ordering::Relaxed),
201            total_bytes: self.total_bytes.load(Ordering::Relaxed),
202            peak_bytes: self.peak_bytes.load(Ordering::Relaxed),
203            current_bytes: self.current_bytes.load(Ordering::Relaxed),
204        }
205    }
206
207    /// Reset all counters to zero.
208    ///
209    /// Intended for use at the start of a profile run, before any
210    /// outstanding allocations exist. Calling `reset` while
211    /// allocations are live can cause `current_bytes` to wrap on
212    /// subsequent deallocations; the other counters are unaffected.
213    ///
214    /// # Example
215    ///
216    /// ```
217    /// use mod_alloc::ModAlloc;
218    ///
219    /// let alloc = ModAlloc::new();
220    /// alloc.reset();
221    /// let stats = alloc.snapshot();
222    /// assert_eq!(stats.alloc_count, 0);
223    /// ```
224    pub fn reset(&self) {
225        self.alloc_count.store(0, Ordering::Relaxed);
226        self.total_bytes.store(0, Ordering::Relaxed);
227        self.peak_bytes.store(0, Ordering::Relaxed);
228        self.current_bytes.store(0, Ordering::Relaxed);
229    }
230
231    #[inline]
232    fn record_alloc(&self, size: u64) {
233        self.alloc_count.fetch_add(1, Ordering::Relaxed);
234        self.total_bytes.fetch_add(size, Ordering::Relaxed);
235        let new_current = self.current_bytes.fetch_add(size, Ordering::Relaxed) + size;
236        self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
237    }
238
239    #[inline]
240    fn record_dealloc(&self, size: u64) {
241        self.current_bytes.fetch_sub(size, Ordering::Relaxed);
242    }
243
244    #[inline]
245    fn record_realloc(&self, old_size: u64, new_size: u64) {
246        self.alloc_count.fetch_add(1, Ordering::Relaxed);
247        if new_size > old_size {
248            let delta = new_size - old_size;
249            self.total_bytes.fetch_add(delta, Ordering::Relaxed);
250            let new_current = self.current_bytes.fetch_add(delta, Ordering::Relaxed) + delta;
251            self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
252        } else if new_size < old_size {
253            self.current_bytes
254                .fetch_sub(old_size - new_size, Ordering::Relaxed);
255        }
256    }
257
258    #[inline]
259    fn register_self(&self) {
260        if GLOBAL_HANDLE.load(Ordering::Relaxed).is_null() {
261            let _ = GLOBAL_HANDLE.compare_exchange(
262                ptr::null_mut(),
263                self as *const ModAlloc as *mut ModAlloc,
264                Ordering::Release,
265                Ordering::Relaxed,
266            );
267        }
268    }
269
270    /// Drain the per-call-site aggregation table into a `Vec`.
271    ///
272    /// Available only with the `backtraces` cargo feature. The
273    /// returned vector contains one [`CallSiteStats`] per unique
274    /// call site observed since the table was first written. Each
275    /// row carries up to 8 raw return addresses (top of stack
276    /// first), the number of allocations attributed to that site,
277    /// and the total bytes.
278    ///
279    /// Symbolication (resolving addresses to function names)
280    /// lands in `v0.9.2`. This method exposes raw addresses only.
281    ///
282    /// # Example
283    ///
284    /// ```no_run
285    /// # #[cfg(feature = "backtraces")]
286    /// # fn demo() {
287    /// use mod_alloc::ModAlloc;
288    ///
289    /// #[global_allocator]
290    /// static GLOBAL: ModAlloc = ModAlloc::new();
291    ///
292    /// let _v: Vec<u8> = vec![0; 1024];
293    /// for site in GLOBAL.call_sites() {
294    ///     println!("{} allocs, {} bytes at {:#x}",
295    ///         site.count, site.total_bytes, site.frames[0]);
296    /// }
297    /// # }
298    /// ```
299    #[cfg(feature = "backtraces")]
300    pub fn call_sites(&self) -> Vec<CallSiteStats> {
301        backtrace::call_sites_report()
302    }
303}
304
305impl Default for ModAlloc {
306    fn default() -> Self {
307        Self::new()
308    }
309}
310
311// SAFETY: `ModAlloc` adds counter bookkeeping but performs all
312// underlying allocation through [`std::alloc::System`]. Each method
313// forwards its arguments unchanged to `System` and only inspects
314// the result; size/alignment invariants required by the
315// `GlobalAlloc` contract are passed through unmodified, so the
316// caller's contract to us becomes our contract to System.
317//
318// The counter-update path uses thread-local reentrancy detection
319// (see `ReentryGuard`) so that any allocation triggered transitively
320// inside the tracking path bypasses tracking and forwards directly
321// to System, preserving the "hook MUST NOT itself allocate"
322// invariant from REPS section 4.
323unsafe impl GlobalAlloc for ModAlloc {
324    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
325        // SAFETY: per `GlobalAlloc::alloc`, `layout` has non-zero
326        // size; we forward unchanged to `System.alloc`, which has
327        // the same contract.
328        let ptr = unsafe { System.alloc(layout) };
329        if !ptr.is_null() {
330            if let Some(_g) = ReentryGuard::enter() {
331                let size = layout.size() as u64;
332                self.record_alloc(size);
333                self.register_self();
334                #[cfg(feature = "backtraces")]
335                backtrace::record_event(size);
336            }
337        }
338        ptr
339    }
340
341    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
342        // SAFETY: same invariants as `alloc`; `layout` forwarded
343        // unchanged. `System.alloc_zeroed` zero-fills the returned
344        // memory, satisfying the `GlobalAlloc::alloc_zeroed`
345        // contract.
346        let ptr = unsafe { System.alloc_zeroed(layout) };
347        if !ptr.is_null() {
348            if let Some(_g) = ReentryGuard::enter() {
349                let size = layout.size() as u64;
350                self.record_alloc(size);
351                self.register_self();
352                #[cfg(feature = "backtraces")]
353                backtrace::record_event(size);
354            }
355        }
356        ptr
357    }
358
359    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
360        // SAFETY: per `GlobalAlloc::dealloc`, `ptr` was returned by a
361        // prior call to `alloc`/`alloc_zeroed`/`realloc` on this
362        // allocator with the given `layout`; we forwarded all of
363        // those to `System` with the same `layout`, so the inverse
364        // pairing for `System.dealloc(ptr, layout)` is valid.
365        unsafe { System.dealloc(ptr, layout) };
366        if let Some(_g) = ReentryGuard::enter() {
367            self.record_dealloc(layout.size() as u64);
368        }
369    }
370
371    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
372        // SAFETY: per `GlobalAlloc::realloc`, `ptr` was returned by
373        // a prior allocation with `layout`, `new_size` is non-zero,
374        // and the alignment in `layout` remains valid for the new
375        // size. We forward all three to `System.realloc` which has
376        // the same contract.
377        let new_ptr = unsafe { System.realloc(ptr, layout, new_size) };
378        if !new_ptr.is_null() {
379            if let Some(_g) = ReentryGuard::enter() {
380                self.record_realloc(layout.size() as u64, new_size as u64);
381                self.register_self();
382                // Per dhat semantics: realloc records as one event
383                // attributed to `new_size` (including shrinks).
384                #[cfg(feature = "backtraces")]
385                backtrace::record_event(new_size as u64);
386            }
387        }
388        new_ptr
389    }
390}
391
392/// Snapshot of allocation statistics at a point in time.
393///
394/// Produced by [`ModAlloc::snapshot`] and [`Profiler::stop`].
395///
396/// # Example
397///
398/// ```
399/// use mod_alloc::AllocStats;
400///
401/// let stats = AllocStats {
402///     alloc_count: 10,
403///     total_bytes: 1024,
404///     peak_bytes: 512,
405///     current_bytes: 256,
406/// };
407/// assert_eq!(stats.alloc_count, 10);
408/// ```
409#[derive(Debug, Clone, Copy, PartialEq, Eq)]
410pub struct AllocStats {
411    /// Number of allocations performed.
412    pub alloc_count: u64,
413    /// Total bytes allocated across all allocations. Reallocations
414    /// contribute the growth delta (or zero on shrink).
415    pub total_bytes: u64,
416    /// Peak resident bytes (highest `current_bytes` ever observed).
417    pub peak_bytes: u64,
418    /// Currently-allocated bytes (allocations minus deallocations).
419    pub current_bytes: u64,
420}
421
422/// Scoped profiler that captures a delta between start and stop.
423///
424/// Read the snapshot of the installed [`ModAlloc`] on construction
425/// and again on [`Profiler::stop`], returning the difference. If no
426/// `ModAlloc` is installed as `#[global_allocator]` and no
427/// allocation has occurred through it yet, both snapshots are
428/// zero and the delta is zero.
429///
430/// # Example
431///
432/// ```no_run
433/// use mod_alloc::{ModAlloc, Profiler};
434///
435/// #[global_allocator]
436/// static GLOBAL: ModAlloc = ModAlloc::new();
437///
438/// fn main() {
439///     let p = Profiler::start();
440///     let v: Vec<u8> = vec![0; 1024];
441///     drop(v);
442///     let stats = p.stop();
443///     println!("Captured {} alloc events", stats.alloc_count);
444/// }
445/// ```
446pub struct Profiler {
447    baseline: AllocStats,
448}
449
450impl Profiler {
451    /// Begin profiling, capturing the current allocation state.
452    ///
453    /// If no `ModAlloc` is installed as `#[global_allocator]` or no
454    /// allocation has occurred yet, the captured baseline is all
455    /// zeros.
456    ///
457    /// # Example
458    ///
459    /// ```
460    /// use mod_alloc::Profiler;
461    ///
462    /// let p = Profiler::start();
463    /// let _delta = p.stop();
464    /// ```
465    pub fn start() -> Self {
466        Self {
467            baseline: current_snapshot_or_zeros(),
468        }
469    }
470
471    /// Stop profiling and return the delta from start.
472    ///
473    /// `alloc_count`, `total_bytes`, and `current_bytes` are deltas
474    /// from `start()` to `stop()`. `peak_bytes` is the absolute
475    /// high-water mark observed during the profiling window (peak
476    /// has no meaningful delta semantic).
477    ///
478    /// # Example
479    ///
480    /// ```
481    /// use mod_alloc::Profiler;
482    ///
483    /// let p = Profiler::start();
484    /// let stats = p.stop();
485    /// assert_eq!(stats.alloc_count, 0);
486    /// ```
487    pub fn stop(self) -> AllocStats {
488        let now = current_snapshot_or_zeros();
489        AllocStats {
490            alloc_count: now.alloc_count.saturating_sub(self.baseline.alloc_count),
491            total_bytes: now.total_bytes.saturating_sub(self.baseline.total_bytes),
492            current_bytes: now
493                .current_bytes
494                .saturating_sub(self.baseline.current_bytes),
495            peak_bytes: now.peak_bytes,
496        }
497    }
498}
499
500fn current_snapshot_or_zeros() -> AllocStats {
501    let p = GLOBAL_HANDLE.load(Ordering::Acquire);
502    if p.is_null() {
503        AllocStats {
504            alloc_count: 0,
505            total_bytes: 0,
506            peak_bytes: 0,
507            current_bytes: 0,
508        }
509    } else {
510        // SAFETY: `GLOBAL_HANDLE` is only ever set by
511        // `ModAlloc::register_self` to point at the address of a
512        // `#[global_allocator] static` (or any other `'static`
513        // `ModAlloc`). That target has `'static` lifetime, so the
514        // pointer remains valid for the remainder of the program.
515        // We produce only a shared borrow used to call `&self`
516        // methods that read atomic counters; no mutation through
517        // the pointer occurs here.
518        unsafe { (*p).snapshot() }
519    }
520}
521
522#[cfg(test)]
523mod tests {
524    use super::*;
525
526    #[test]
527    fn allocator_constructs() {
528        let _ = ModAlloc::new();
529    }
530
531    #[test]
532    fn snapshot_returns_zeros_initially() {
533        let a = ModAlloc::new();
534        let s = a.snapshot();
535        assert_eq!(s.alloc_count, 0);
536        assert_eq!(s.total_bytes, 0);
537        assert_eq!(s.peak_bytes, 0);
538        assert_eq!(s.current_bytes, 0);
539    }
540
541    #[test]
542    fn reset_works() {
543        let a = ModAlloc::new();
544        a.reset();
545        let s = a.snapshot();
546        assert_eq!(s.alloc_count, 0);
547    }
548
549    #[test]
550    fn record_alloc_updates_counters() {
551        let a = ModAlloc::new();
552        a.record_alloc(128);
553        a.record_alloc(256);
554        let s = a.snapshot();
555        assert_eq!(s.alloc_count, 2);
556        assert_eq!(s.total_bytes, 384);
557        assert_eq!(s.current_bytes, 384);
558        assert_eq!(s.peak_bytes, 384);
559    }
560
561    #[test]
562    fn record_dealloc_decreases_current_only() {
563        let a = ModAlloc::new();
564        a.record_alloc(1000);
565        a.record_dealloc(400);
566        let s = a.snapshot();
567        assert_eq!(s.alloc_count, 1);
568        assert_eq!(s.total_bytes, 1000);
569        assert_eq!(s.current_bytes, 600);
570        assert_eq!(s.peak_bytes, 1000);
571    }
572
573    #[test]
574    fn record_realloc_growth_updates_total_and_peak() {
575        let a = ModAlloc::new();
576        a.record_alloc(100);
577        a.record_realloc(100, 250);
578        let s = a.snapshot();
579        assert_eq!(s.alloc_count, 2);
580        assert_eq!(s.total_bytes, 250);
581        assert_eq!(s.current_bytes, 250);
582        assert_eq!(s.peak_bytes, 250);
583    }
584
585    #[test]
586    fn record_realloc_shrink_only_adjusts_current() {
587        let a = ModAlloc::new();
588        a.record_alloc(500);
589        a.record_realloc(500, 200);
590        let s = a.snapshot();
591        assert_eq!(s.alloc_count, 2);
592        assert_eq!(s.total_bytes, 500);
593        assert_eq!(s.current_bytes, 200);
594        assert_eq!(s.peak_bytes, 500);
595    }
596
597    #[test]
598    fn peak_holds_high_water_mark() {
599        let a = ModAlloc::new();
600        a.record_alloc(1000);
601        a.record_dealloc(1000);
602        a.record_alloc(500);
603        let s = a.snapshot();
604        assert_eq!(s.peak_bytes, 1000);
605        assert_eq!(s.current_bytes, 500);
606    }
607
608    #[test]
609    fn reentry_guard_blocks_nested_entry() {
610        let outer = ReentryGuard::enter();
611        assert!(outer.is_some());
612        let inner = ReentryGuard::enter();
613        assert!(inner.is_none(), "nested entry must be denied");
614        drop(outer);
615        let after = ReentryGuard::enter();
616        assert!(after.is_some(), "entry must be allowed after outer drops");
617    }
618
619    #[test]
620    fn profiler_start_stop_with_no_handle() {
621        let p = Profiler::start();
622        let s = p.stop();
623        assert_eq!(s.alloc_count, 0);
624    }
625}