Skip to main content

mod_alloc/
lib.rs

1//! # mod-alloc
2//!
3//! Allocation profiling for Rust. Tracks allocation counts, total
4//! bytes, peak resident memory, and current resident memory by
5//! wrapping the system allocator via [`GlobalAlloc`].
6//!
7//! Designed as a lean replacement for `dhat` with MSRV 1.75 and
8//! zero external dependencies on the hot path.
9//!
10//! ## Installing as the global allocator
11//!
12//! ```no_run
13//! use mod_alloc::{ModAlloc, Profiler};
14//!
15//! #[global_allocator]
16//! static GLOBAL: ModAlloc = ModAlloc::new();
17//!
18//! fn main() {
19//!     let p = Profiler::start();
20//!
21//!     let v: Vec<u64> = (0..1000).collect();
22//!     drop(v);
23//!
24//!     let stats = p.stop();
25//!     println!("Allocations: {}", stats.alloc_count);
26//!     println!("Total bytes: {}", stats.total_bytes);
27//!     println!("Peak bytes (absolute): {}", stats.peak_bytes);
28//! }
29//! ```
30//!
31//! ## Counter semantics
32//!
33//! The four Tier 1 counters track allocator activity since the
34//! installed [`ModAlloc`] began counting (or since the last
35//! [`ModAlloc::reset`] call):
36//!
37//! | Counter         | Updated on `alloc`            | Updated on `dealloc` |
38//! |-----------------|-------------------------------|----------------------|
39//! | `alloc_count`   | `+= 1`                        | (unchanged)          |
40//! | `total_bytes`   | `+= size`                     | (unchanged)          |
41//! | `current_bytes` | `+= size`                     | `-= size`            |
42//! | `peak_bytes`    | high-water mark of `current`  | (unchanged)          |
43//!
44//! `realloc` is counted as one allocation event. `total_bytes`
45//! increases by the growth delta on a growing realloc and is
46//! unchanged on a shrinking realloc.
47//!
48//! ## Status
49//!
50//! v0.9.1 adds Tier 2 (inline backtrace capture) behind the
51//! `backtraces` feature. Default builds still ship Tier 1
52//! counters only. Tier 3 (DHAT-compatible JSON output) lands in
53//! v0.9.3.
54//!
55//! ## Backtraces (`backtraces` feature)
56//!
57//! With `mod-alloc = { version = "0.9", features = ["backtraces"] }`
58//! and `RUSTFLAGS="-C force-frame-pointers=yes"`, each tracked
59//! allocation captures up to 8 frames of its call site via inline
60//! frame-pointer walking on `x86_64` and `aarch64`. Per-call-site
61//! aggregation is exposed via `ModAlloc::call_sites` (available
62//! only with the `backtraces` feature); the result is raw return
63//! addresses. Symbolication ships in v0.9.2.
64//!
65//! Aggregation-table size is controlled by the `MOD_ALLOC_BUCKETS`
66//! environment variable at process start (default 4,096 buckets,
67//! ~384 KB).
68
69#![cfg_attr(docsrs, feature(doc_cfg))]
70#![warn(missing_docs)]
71#![warn(rust_2018_idioms)]
72
73#[cfg(feature = "backtraces")]
74mod backtrace;
75
76#[cfg(feature = "backtraces")]
77pub use backtrace::CallSiteStats;
78
79#[cfg(feature = "symbolicate")]
80mod symbolicate;
81
82#[cfg(feature = "symbolicate")]
83pub use symbolicate::{SymbolicatedCallSite, SymbolicatedFrame};
84
85use std::alloc::{GlobalAlloc, Layout, System};
86use std::cell::Cell;
87use std::ptr;
88use std::sync::atomic::{AtomicPtr, AtomicU64, Ordering};
89
90// Process-wide handle to the installed `ModAlloc`. Populated lazily
91// on the first non-reentrant alloc call. `Profiler` reads from this
92// to locate the canonical counters without requiring an explicit
93// registration call from the user.
94static GLOBAL_HANDLE: AtomicPtr<ModAlloc> = AtomicPtr::new(ptr::null_mut());
95
96thread_local! {
97    // Reentrancy flag. Set while inside the tracking path; if any
98    // allocation occurs while set, the recursive call bypasses
99    // tracking and forwards directly to the System allocator.
100    //
101    // `const` initialization (stable since 1.59) avoids any lazy
102    // construction allocation inside the TLS access path.
103    static IN_ALLOC: Cell<bool> = const { Cell::new(false) };
104}
105
106// RAII guard for the reentrancy flag. `enter` returns `None` if the
107// current thread is already inside a tracked allocation (caller
108// must skip counter updates) or if TLS is unavailable (e.g. during
109// thread teardown). The guard clears the flag on drop.
110struct ReentryGuard;
111
112impl ReentryGuard {
113    fn enter() -> Option<Self> {
114        IN_ALLOC
115            .try_with(|flag| {
116                if flag.get() {
117                    None
118                } else {
119                    flag.set(true);
120                    Some(ReentryGuard)
121                }
122            })
123            .ok()
124            .flatten()
125    }
126}
127
128impl Drop for ReentryGuard {
129    fn drop(&mut self) {
130        let _ = IN_ALLOC.try_with(|flag| flag.set(false));
131    }
132}
133
134/// Global allocator wrapper that tracks allocations.
135///
136/// Install as `#[global_allocator]` to enable tracking. The wrapper
137/// forwards every allocation, deallocation, reallocation, and
138/// zero-initialised allocation to [`std::alloc::System`] and records
139/// the event in four lock-free [`AtomicU64`] counters.
140///
141/// # Example
142///
143/// ```no_run
144/// use mod_alloc::ModAlloc;
145///
146/// #[global_allocator]
147/// static GLOBAL: ModAlloc = ModAlloc::new();
148///
149/// fn main() {
150///     let v: Vec<u8> = vec![0; 1024];
151///     let stats = GLOBAL.snapshot();
152///     assert!(stats.alloc_count >= 1);
153///     drop(v);
154/// }
155/// ```
156pub struct ModAlloc {
157    alloc_count: AtomicU64,
158    total_bytes: AtomicU64,
159    peak_bytes: AtomicU64,
160    current_bytes: AtomicU64,
161}
162
163impl ModAlloc {
164    /// Construct a new `ModAlloc` allocator wrapper.
165    ///
166    /// All counters start at zero. This function is `const`, which
167    /// allows construction in a `static` for use as
168    /// `#[global_allocator]`.
169    ///
170    /// # Example
171    ///
172    /// ```
173    /// use mod_alloc::ModAlloc;
174    ///
175    /// static GLOBAL: ModAlloc = ModAlloc::new();
176    /// let stats = GLOBAL.snapshot();
177    /// assert_eq!(stats.alloc_count, 0);
178    /// ```
179    pub const fn new() -> Self {
180        Self {
181            alloc_count: AtomicU64::new(0),
182            total_bytes: AtomicU64::new(0),
183            peak_bytes: AtomicU64::new(0),
184            current_bytes: AtomicU64::new(0),
185        }
186    }
187
188    /// Snapshot the current counter values.
189    ///
190    /// Each counter is read independently with `Relaxed` ordering;
191    /// the resulting [`AllocStats`] is a coherent best-effort view
192    /// but does not represent a single atomic moment in time. For
193    /// scoped measurement, prefer [`Profiler`].
194    ///
195    /// # Example
196    ///
197    /// ```
198    /// use mod_alloc::ModAlloc;
199    ///
200    /// let alloc = ModAlloc::new();
201    /// let stats = alloc.snapshot();
202    /// assert_eq!(stats.alloc_count, 0);
203    /// ```
204    pub fn snapshot(&self) -> AllocStats {
205        AllocStats {
206            alloc_count: self.alloc_count.load(Ordering::Relaxed),
207            total_bytes: self.total_bytes.load(Ordering::Relaxed),
208            peak_bytes: self.peak_bytes.load(Ordering::Relaxed),
209            current_bytes: self.current_bytes.load(Ordering::Relaxed),
210        }
211    }
212
213    /// Reset all counters to zero.
214    ///
215    /// Intended for use at the start of a profile run, before any
216    /// outstanding allocations exist. Calling `reset` while
217    /// allocations are live can cause `current_bytes` to wrap on
218    /// subsequent deallocations; the other counters are unaffected.
219    ///
220    /// # Example
221    ///
222    /// ```
223    /// use mod_alloc::ModAlloc;
224    ///
225    /// let alloc = ModAlloc::new();
226    /// alloc.reset();
227    /// let stats = alloc.snapshot();
228    /// assert_eq!(stats.alloc_count, 0);
229    /// ```
230    pub fn reset(&self) {
231        self.alloc_count.store(0, Ordering::Relaxed);
232        self.total_bytes.store(0, Ordering::Relaxed);
233        self.peak_bytes.store(0, Ordering::Relaxed);
234        self.current_bytes.store(0, Ordering::Relaxed);
235    }
236
237    #[inline]
238    fn record_alloc(&self, size: u64) {
239        self.alloc_count.fetch_add(1, Ordering::Relaxed);
240        self.total_bytes.fetch_add(size, Ordering::Relaxed);
241        let new_current = self.current_bytes.fetch_add(size, Ordering::Relaxed) + size;
242        self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
243    }
244
245    #[inline]
246    fn record_dealloc(&self, size: u64) {
247        self.current_bytes.fetch_sub(size, Ordering::Relaxed);
248    }
249
250    #[inline]
251    fn record_realloc(&self, old_size: u64, new_size: u64) {
252        self.alloc_count.fetch_add(1, Ordering::Relaxed);
253        if new_size > old_size {
254            let delta = new_size - old_size;
255            self.total_bytes.fetch_add(delta, Ordering::Relaxed);
256            let new_current = self.current_bytes.fetch_add(delta, Ordering::Relaxed) + delta;
257            self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
258        } else if new_size < old_size {
259            self.current_bytes
260                .fetch_sub(old_size - new_size, Ordering::Relaxed);
261        }
262    }
263
264    #[inline]
265    fn register_self(&self) {
266        if GLOBAL_HANDLE.load(Ordering::Relaxed).is_null() {
267            let _ = GLOBAL_HANDLE.compare_exchange(
268                ptr::null_mut(),
269                self as *const ModAlloc as *mut ModAlloc,
270                Ordering::Release,
271                Ordering::Relaxed,
272            );
273        }
274    }
275
276    /// Drain the per-call-site aggregation table into a `Vec`.
277    ///
278    /// Available only with the `backtraces` cargo feature. The
279    /// returned vector contains one [`CallSiteStats`] per unique
280    /// call site observed since the table was first written. Each
281    /// row carries up to 8 raw return addresses (top of stack
282    /// first), the number of allocations attributed to that site,
283    /// and the total bytes.
284    ///
285    /// Symbolication (resolving addresses to function names)
286    /// lands in `v0.9.2`. This method exposes raw addresses only.
287    ///
288    /// # Example
289    ///
290    /// ```no_run
291    /// # #[cfg(feature = "backtraces")]
292    /// # fn demo() {
293    /// use mod_alloc::ModAlloc;
294    ///
295    /// #[global_allocator]
296    /// static GLOBAL: ModAlloc = ModAlloc::new();
297    ///
298    /// let _v: Vec<u8> = vec![0; 1024];
299    /// for site in GLOBAL.call_sites() {
300    ///     println!("{} allocs, {} bytes at {:#x}",
301    ///         site.count, site.total_bytes, site.frames[0]);
302    /// }
303    /// # }
304    /// ```
305    #[cfg(feature = "backtraces")]
306    pub fn call_sites(&self) -> Vec<CallSiteStats> {
307        backtrace::call_sites_report()
308    }
309
310    /// Drain the per-call-site table and symbolicate each frame
311    /// against the running binary's own debug info.
312    ///
313    /// Available only with the `symbolicate` cargo feature, which
314    /// also implies `backtraces`. Returns one
315    /// [`SymbolicatedCallSite`] per unique call site, each
316    /// carrying resolved function names plus (where available)
317    /// source file and line.
318    ///
319    /// Allocates. Safe to call from non-allocator contexts only
320    /// (ordinary user code outside the global-allocator hook).
321    ///
322    /// Results are cached per-address across calls.
323    ///
324    /// # Example
325    ///
326    /// ```no_run
327    /// # #[cfg(feature = "symbolicate")]
328    /// # fn demo() {
329    /// use mod_alloc::ModAlloc;
330    ///
331    /// #[global_allocator]
332    /// static GLOBAL: ModAlloc = ModAlloc::new();
333    ///
334    /// let _v: Vec<u8> = vec![0; 1024];
335    /// for site in GLOBAL.symbolicated_report() {
336    ///     let top = &site.frames[0];
337    ///     println!("{} allocs / {} bytes at {}",
338    ///         site.count,
339    ///         site.total_bytes,
340    ///         top.function.as_deref().unwrap_or("<unresolved>"));
341    /// }
342    /// # }
343    /// ```
344    #[cfg(feature = "symbolicate")]
345    pub fn symbolicated_report(&self) -> Vec<SymbolicatedCallSite> {
346        symbolicate::symbolicated_report()
347    }
348}
349
350impl Default for ModAlloc {
351    fn default() -> Self {
352        Self::new()
353    }
354}
355
356// SAFETY: `ModAlloc` adds counter bookkeeping but performs all
357// underlying allocation through [`std::alloc::System`]. Each method
358// forwards its arguments unchanged to `System` and only inspects
359// the result; size/alignment invariants required by the
360// `GlobalAlloc` contract are passed through unmodified, so the
361// caller's contract to us becomes our contract to System.
362//
363// The counter-update path uses thread-local reentrancy detection
364// (see `ReentryGuard`) so that any allocation triggered transitively
365// inside the tracking path bypasses tracking and forwards directly
366// to System, preserving the "hook MUST NOT itself allocate"
367// invariant from REPS section 4.
368unsafe impl GlobalAlloc for ModAlloc {
369    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
370        // SAFETY: per `GlobalAlloc::alloc`, `layout` has non-zero
371        // size; we forward unchanged to `System.alloc`, which has
372        // the same contract.
373        let ptr = unsafe { System.alloc(layout) };
374        if !ptr.is_null() {
375            if let Some(_g) = ReentryGuard::enter() {
376                let size = layout.size() as u64;
377                self.record_alloc(size);
378                self.register_self();
379                #[cfg(feature = "backtraces")]
380                backtrace::record_event(size);
381            }
382        }
383        ptr
384    }
385
386    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
387        // SAFETY: same invariants as `alloc`; `layout` forwarded
388        // unchanged. `System.alloc_zeroed` zero-fills the returned
389        // memory, satisfying the `GlobalAlloc::alloc_zeroed`
390        // contract.
391        let ptr = unsafe { System.alloc_zeroed(layout) };
392        if !ptr.is_null() {
393            if let Some(_g) = ReentryGuard::enter() {
394                let size = layout.size() as u64;
395                self.record_alloc(size);
396                self.register_self();
397                #[cfg(feature = "backtraces")]
398                backtrace::record_event(size);
399            }
400        }
401        ptr
402    }
403
404    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
405        // SAFETY: per `GlobalAlloc::dealloc`, `ptr` was returned by a
406        // prior call to `alloc`/`alloc_zeroed`/`realloc` on this
407        // allocator with the given `layout`; we forwarded all of
408        // those to `System` with the same `layout`, so the inverse
409        // pairing for `System.dealloc(ptr, layout)` is valid.
410        unsafe { System.dealloc(ptr, layout) };
411        if let Some(_g) = ReentryGuard::enter() {
412            self.record_dealloc(layout.size() as u64);
413        }
414    }
415
416    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
417        // SAFETY: per `GlobalAlloc::realloc`, `ptr` was returned by
418        // a prior allocation with `layout`, `new_size` is non-zero,
419        // and the alignment in `layout` remains valid for the new
420        // size. We forward all three to `System.realloc` which has
421        // the same contract.
422        let new_ptr = unsafe { System.realloc(ptr, layout, new_size) };
423        if !new_ptr.is_null() {
424            if let Some(_g) = ReentryGuard::enter() {
425                self.record_realloc(layout.size() as u64, new_size as u64);
426                self.register_self();
427                // Per dhat semantics: realloc records as one event
428                // attributed to `new_size` (including shrinks).
429                #[cfg(feature = "backtraces")]
430                backtrace::record_event(new_size as u64);
431            }
432        }
433        new_ptr
434    }
435}
436
437/// Snapshot of allocation statistics at a point in time.
438///
439/// Produced by [`ModAlloc::snapshot`] and [`Profiler::stop`].
440///
441/// # Example
442///
443/// ```
444/// use mod_alloc::AllocStats;
445///
446/// let stats = AllocStats {
447///     alloc_count: 10,
448///     total_bytes: 1024,
449///     peak_bytes: 512,
450///     current_bytes: 256,
451/// };
452/// assert_eq!(stats.alloc_count, 10);
453/// ```
454#[derive(Debug, Clone, Copy, PartialEq, Eq)]
455pub struct AllocStats {
456    /// Number of allocations performed.
457    pub alloc_count: u64,
458    /// Total bytes allocated across all allocations. Reallocations
459    /// contribute the growth delta (or zero on shrink).
460    pub total_bytes: u64,
461    /// Peak resident bytes (highest `current_bytes` ever observed).
462    pub peak_bytes: u64,
463    /// Currently-allocated bytes (allocations minus deallocations).
464    pub current_bytes: u64,
465}
466
467/// Scoped profiler that captures a delta between start and stop.
468///
469/// Read the snapshot of the installed [`ModAlloc`] on construction
470/// and again on [`Profiler::stop`], returning the difference. If no
471/// `ModAlloc` is installed as `#[global_allocator]` and no
472/// allocation has occurred through it yet, both snapshots are
473/// zero and the delta is zero.
474///
475/// # Example
476///
477/// ```no_run
478/// use mod_alloc::{ModAlloc, Profiler};
479///
480/// #[global_allocator]
481/// static GLOBAL: ModAlloc = ModAlloc::new();
482///
483/// fn main() {
484///     let p = Profiler::start();
485///     let v: Vec<u8> = vec![0; 1024];
486///     drop(v);
487///     let stats = p.stop();
488///     println!("Captured {} alloc events", stats.alloc_count);
489/// }
490/// ```
491pub struct Profiler {
492    baseline: AllocStats,
493}
494
495impl Profiler {
496    /// Begin profiling, capturing the current allocation state.
497    ///
498    /// If no `ModAlloc` is installed as `#[global_allocator]` or no
499    /// allocation has occurred yet, the captured baseline is all
500    /// zeros.
501    ///
502    /// # Example
503    ///
504    /// ```
505    /// use mod_alloc::Profiler;
506    ///
507    /// let p = Profiler::start();
508    /// let _delta = p.stop();
509    /// ```
510    pub fn start() -> Self {
511        Self {
512            baseline: current_snapshot_or_zeros(),
513        }
514    }
515
516    /// Stop profiling and return the delta from start.
517    ///
518    /// `alloc_count`, `total_bytes`, and `current_bytes` are deltas
519    /// from `start()` to `stop()`. `peak_bytes` is the absolute
520    /// high-water mark observed during the profiling window (peak
521    /// has no meaningful delta semantic).
522    ///
523    /// # Example
524    ///
525    /// ```
526    /// use mod_alloc::Profiler;
527    ///
528    /// let p = Profiler::start();
529    /// let stats = p.stop();
530    /// assert_eq!(stats.alloc_count, 0);
531    /// ```
532    pub fn stop(self) -> AllocStats {
533        let now = current_snapshot_or_zeros();
534        AllocStats {
535            alloc_count: now.alloc_count.saturating_sub(self.baseline.alloc_count),
536            total_bytes: now.total_bytes.saturating_sub(self.baseline.total_bytes),
537            current_bytes: now
538                .current_bytes
539                .saturating_sub(self.baseline.current_bytes),
540            peak_bytes: now.peak_bytes,
541        }
542    }
543}
544
545fn current_snapshot_or_zeros() -> AllocStats {
546    let p = GLOBAL_HANDLE.load(Ordering::Acquire);
547    if p.is_null() {
548        AllocStats {
549            alloc_count: 0,
550            total_bytes: 0,
551            peak_bytes: 0,
552            current_bytes: 0,
553        }
554    } else {
555        // SAFETY: `GLOBAL_HANDLE` is only ever set by
556        // `ModAlloc::register_self` to point at the address of a
557        // `#[global_allocator] static` (or any other `'static`
558        // `ModAlloc`). That target has `'static` lifetime, so the
559        // pointer remains valid for the remainder of the program.
560        // We produce only a shared borrow used to call `&self`
561        // methods that read atomic counters; no mutation through
562        // the pointer occurs here.
563        unsafe { (*p).snapshot() }
564    }
565}
566
567#[cfg(test)]
568mod tests {
569    use super::*;
570
571    #[test]
572    fn allocator_constructs() {
573        let _ = ModAlloc::new();
574    }
575
576    #[test]
577    fn snapshot_returns_zeros_initially() {
578        let a = ModAlloc::new();
579        let s = a.snapshot();
580        assert_eq!(s.alloc_count, 0);
581        assert_eq!(s.total_bytes, 0);
582        assert_eq!(s.peak_bytes, 0);
583        assert_eq!(s.current_bytes, 0);
584    }
585
586    #[test]
587    fn reset_works() {
588        let a = ModAlloc::new();
589        a.reset();
590        let s = a.snapshot();
591        assert_eq!(s.alloc_count, 0);
592    }
593
594    #[test]
595    fn record_alloc_updates_counters() {
596        let a = ModAlloc::new();
597        a.record_alloc(128);
598        a.record_alloc(256);
599        let s = a.snapshot();
600        assert_eq!(s.alloc_count, 2);
601        assert_eq!(s.total_bytes, 384);
602        assert_eq!(s.current_bytes, 384);
603        assert_eq!(s.peak_bytes, 384);
604    }
605
606    #[test]
607    fn record_dealloc_decreases_current_only() {
608        let a = ModAlloc::new();
609        a.record_alloc(1000);
610        a.record_dealloc(400);
611        let s = a.snapshot();
612        assert_eq!(s.alloc_count, 1);
613        assert_eq!(s.total_bytes, 1000);
614        assert_eq!(s.current_bytes, 600);
615        assert_eq!(s.peak_bytes, 1000);
616    }
617
618    #[test]
619    fn record_realloc_growth_updates_total_and_peak() {
620        let a = ModAlloc::new();
621        a.record_alloc(100);
622        a.record_realloc(100, 250);
623        let s = a.snapshot();
624        assert_eq!(s.alloc_count, 2);
625        assert_eq!(s.total_bytes, 250);
626        assert_eq!(s.current_bytes, 250);
627        assert_eq!(s.peak_bytes, 250);
628    }
629
630    #[test]
631    fn record_realloc_shrink_only_adjusts_current() {
632        let a = ModAlloc::new();
633        a.record_alloc(500);
634        a.record_realloc(500, 200);
635        let s = a.snapshot();
636        assert_eq!(s.alloc_count, 2);
637        assert_eq!(s.total_bytes, 500);
638        assert_eq!(s.current_bytes, 200);
639        assert_eq!(s.peak_bytes, 500);
640    }
641
642    #[test]
643    fn peak_holds_high_water_mark() {
644        let a = ModAlloc::new();
645        a.record_alloc(1000);
646        a.record_dealloc(1000);
647        a.record_alloc(500);
648        let s = a.snapshot();
649        assert_eq!(s.peak_bytes, 1000);
650        assert_eq!(s.current_bytes, 500);
651    }
652
653    #[test]
654    fn reentry_guard_blocks_nested_entry() {
655        let outer = ReentryGuard::enter();
656        assert!(outer.is_some());
657        let inner = ReentryGuard::enter();
658        assert!(inner.is_none(), "nested entry must be denied");
659        drop(outer);
660        let after = ReentryGuard::enter();
661        assert!(after.is_some(), "entry must be allowed after outer drops");
662    }
663
664    #[test]
665    fn profiler_start_stop_with_no_handle() {
666        let p = Profiler::start();
667        let s = p.stop();
668        assert_eq!(s.alloc_count, 0);
669    }
670}