Skip to main content

mod_alloc/
lib.rs

1//! # mod-alloc
2//!
3//! Allocation profiling for Rust. Tracks allocation counts, total
4//! bytes, peak resident memory, and current resident memory by
5//! wrapping the system allocator via [`GlobalAlloc`].
6//!
7//! Designed as a lean replacement for `dhat` with MSRV 1.75 and
8//! zero external dependencies on the hot path.
9//!
10//! ## Installing as the global allocator
11//!
12//! ```no_run
13//! use mod_alloc::{ModAlloc, Profiler};
14//!
15//! #[global_allocator]
16//! static GLOBAL: ModAlloc = ModAlloc::new();
17//!
18//! fn main() {
19//!     let p = Profiler::start();
20//!
21//!     let v: Vec<u64> = (0..1000).collect();
22//!     drop(v);
23//!
24//!     let stats = p.stop();
25//!     println!("Allocations: {}", stats.alloc_count);
26//!     println!("Total bytes: {}", stats.total_bytes);
27//!     println!("Peak bytes (absolute): {}", stats.peak_bytes);
28//! }
29//! ```
30//!
31//! ## Counter semantics
32//!
33//! The four Tier 1 counters track allocator activity since the
34//! installed [`ModAlloc`] began counting (or since the last
35//! [`ModAlloc::reset`] call):
36//!
37//! | Counter         | Updated on `alloc`            | Updated on `dealloc` |
38//! |-----------------|-------------------------------|----------------------|
39//! | `alloc_count`   | `+= 1`                        | (unchanged)          |
40//! | `total_bytes`   | `+= size`                     | (unchanged)          |
41//! | `current_bytes` | `+= size`                     | `-= size`            |
42//! | `peak_bytes`    | high-water mark of `current`  | (unchanged)          |
43//!
44//! `realloc` is counted as one allocation event. `total_bytes`
45//! increases by the growth delta on a growing realloc and is
46//! unchanged on a shrinking realloc.
47//!
48//! ## Status
49//!
50//! v0.9.0 ships Tier 1 (counters) only. The `backtraces` and
51//! `dhat-compat` cargo features are defined for forward
52//! compatibility but compile as no-ops; Tier 2 (inline backtrace
53//! capture) lands in v0.9.1 and Tier 3 (DHAT-compatible JSON
54//! output) lands in v0.9.3.
55
56#![cfg_attr(docsrs, feature(doc_cfg))]
57#![warn(missing_docs)]
58#![warn(rust_2018_idioms)]
59
60use std::alloc::{GlobalAlloc, Layout, System};
61use std::cell::Cell;
62use std::ptr;
63use std::sync::atomic::{AtomicPtr, AtomicU64, Ordering};
64
65// Process-wide handle to the installed `ModAlloc`. Populated lazily
66// on the first non-reentrant alloc call. `Profiler` reads from this
67// to locate the canonical counters without requiring an explicit
68// registration call from the user.
69static GLOBAL_HANDLE: AtomicPtr<ModAlloc> = AtomicPtr::new(ptr::null_mut());
70
71thread_local! {
72    // Reentrancy flag. Set while inside the tracking path; if any
73    // allocation occurs while set, the recursive call bypasses
74    // tracking and forwards directly to the System allocator.
75    //
76    // `const` initialization (stable since 1.59) avoids any lazy
77    // construction allocation inside the TLS access path.
78    static IN_ALLOC: Cell<bool> = const { Cell::new(false) };
79}
80
81// RAII guard for the reentrancy flag. `enter` returns `None` if the
82// current thread is already inside a tracked allocation (caller
83// must skip counter updates) or if TLS is unavailable (e.g. during
84// thread teardown). The guard clears the flag on drop.
85struct ReentryGuard;
86
87impl ReentryGuard {
88    fn enter() -> Option<Self> {
89        IN_ALLOC
90            .try_with(|flag| {
91                if flag.get() {
92                    None
93                } else {
94                    flag.set(true);
95                    Some(ReentryGuard)
96                }
97            })
98            .ok()
99            .flatten()
100    }
101}
102
103impl Drop for ReentryGuard {
104    fn drop(&mut self) {
105        let _ = IN_ALLOC.try_with(|flag| flag.set(false));
106    }
107}
108
109/// Global allocator wrapper that tracks allocations.
110///
111/// Install as `#[global_allocator]` to enable tracking. The wrapper
112/// forwards every allocation, deallocation, reallocation, and
113/// zero-initialised allocation to [`std::alloc::System`] and records
114/// the event in four lock-free [`AtomicU64`] counters.
115///
116/// # Example
117///
118/// ```no_run
119/// use mod_alloc::ModAlloc;
120///
121/// #[global_allocator]
122/// static GLOBAL: ModAlloc = ModAlloc::new();
123///
124/// fn main() {
125///     let v: Vec<u8> = vec![0; 1024];
126///     let stats = GLOBAL.snapshot();
127///     assert!(stats.alloc_count >= 1);
128///     drop(v);
129/// }
130/// ```
131pub struct ModAlloc {
132    alloc_count: AtomicU64,
133    total_bytes: AtomicU64,
134    peak_bytes: AtomicU64,
135    current_bytes: AtomicU64,
136}
137
138impl ModAlloc {
139    /// Construct a new `ModAlloc` allocator wrapper.
140    ///
141    /// All counters start at zero. This function is `const`, which
142    /// allows construction in a `static` for use as
143    /// `#[global_allocator]`.
144    ///
145    /// # Example
146    ///
147    /// ```
148    /// use mod_alloc::ModAlloc;
149    ///
150    /// static GLOBAL: ModAlloc = ModAlloc::new();
151    /// let stats = GLOBAL.snapshot();
152    /// assert_eq!(stats.alloc_count, 0);
153    /// ```
154    pub const fn new() -> Self {
155        Self {
156            alloc_count: AtomicU64::new(0),
157            total_bytes: AtomicU64::new(0),
158            peak_bytes: AtomicU64::new(0),
159            current_bytes: AtomicU64::new(0),
160        }
161    }
162
163    /// Snapshot the current counter values.
164    ///
165    /// Each counter is read independently with `Relaxed` ordering;
166    /// the resulting [`AllocStats`] is a coherent best-effort view
167    /// but does not represent a single atomic moment in time. For
168    /// scoped measurement, prefer [`Profiler`].
169    ///
170    /// # Example
171    ///
172    /// ```
173    /// use mod_alloc::ModAlloc;
174    ///
175    /// let alloc = ModAlloc::new();
176    /// let stats = alloc.snapshot();
177    /// assert_eq!(stats.alloc_count, 0);
178    /// ```
179    pub fn snapshot(&self) -> AllocStats {
180        AllocStats {
181            alloc_count: self.alloc_count.load(Ordering::Relaxed),
182            total_bytes: self.total_bytes.load(Ordering::Relaxed),
183            peak_bytes: self.peak_bytes.load(Ordering::Relaxed),
184            current_bytes: self.current_bytes.load(Ordering::Relaxed),
185        }
186    }
187
188    /// Reset all counters to zero.
189    ///
190    /// Intended for use at the start of a profile run, before any
191    /// outstanding allocations exist. Calling `reset` while
192    /// allocations are live can cause `current_bytes` to wrap on
193    /// subsequent deallocations; the other counters are unaffected.
194    ///
195    /// # Example
196    ///
197    /// ```
198    /// use mod_alloc::ModAlloc;
199    ///
200    /// let alloc = ModAlloc::new();
201    /// alloc.reset();
202    /// let stats = alloc.snapshot();
203    /// assert_eq!(stats.alloc_count, 0);
204    /// ```
205    pub fn reset(&self) {
206        self.alloc_count.store(0, Ordering::Relaxed);
207        self.total_bytes.store(0, Ordering::Relaxed);
208        self.peak_bytes.store(0, Ordering::Relaxed);
209        self.current_bytes.store(0, Ordering::Relaxed);
210    }
211
212    #[inline]
213    fn record_alloc(&self, size: u64) {
214        self.alloc_count.fetch_add(1, Ordering::Relaxed);
215        self.total_bytes.fetch_add(size, Ordering::Relaxed);
216        let new_current = self.current_bytes.fetch_add(size, Ordering::Relaxed) + size;
217        self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
218    }
219
220    #[inline]
221    fn record_dealloc(&self, size: u64) {
222        self.current_bytes.fetch_sub(size, Ordering::Relaxed);
223    }
224
225    #[inline]
226    fn record_realloc(&self, old_size: u64, new_size: u64) {
227        self.alloc_count.fetch_add(1, Ordering::Relaxed);
228        if new_size > old_size {
229            let delta = new_size - old_size;
230            self.total_bytes.fetch_add(delta, Ordering::Relaxed);
231            let new_current = self.current_bytes.fetch_add(delta, Ordering::Relaxed) + delta;
232            self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
233        } else if new_size < old_size {
234            self.current_bytes
235                .fetch_sub(old_size - new_size, Ordering::Relaxed);
236        }
237    }
238
239    #[inline]
240    fn register_self(&self) {
241        if GLOBAL_HANDLE.load(Ordering::Relaxed).is_null() {
242            let _ = GLOBAL_HANDLE.compare_exchange(
243                ptr::null_mut(),
244                self as *const ModAlloc as *mut ModAlloc,
245                Ordering::Release,
246                Ordering::Relaxed,
247            );
248        }
249    }
250}
251
252impl Default for ModAlloc {
253    fn default() -> Self {
254        Self::new()
255    }
256}
257
258// SAFETY: `ModAlloc` adds counter bookkeeping but performs all
259// underlying allocation through [`std::alloc::System`]. Each method
260// forwards its arguments unchanged to `System` and only inspects
261// the result; size/alignment invariants required by the
262// `GlobalAlloc` contract are passed through unmodified, so the
263// caller's contract to us becomes our contract to System.
264//
265// The counter-update path uses thread-local reentrancy detection
266// (see `ReentryGuard`) so that any allocation triggered transitively
267// inside the tracking path bypasses tracking and forwards directly
268// to System, preserving the "hook MUST NOT itself allocate"
269// invariant from REPS section 4.
270unsafe impl GlobalAlloc for ModAlloc {
271    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
272        // SAFETY: per `GlobalAlloc::alloc`, `layout` has non-zero
273        // size; we forward unchanged to `System.alloc`, which has
274        // the same contract.
275        let ptr = unsafe { System.alloc(layout) };
276        if !ptr.is_null() {
277            if let Some(_g) = ReentryGuard::enter() {
278                self.record_alloc(layout.size() as u64);
279                self.register_self();
280            }
281        }
282        ptr
283    }
284
285    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
286        // SAFETY: same invariants as `alloc`; `layout` forwarded
287        // unchanged. `System.alloc_zeroed` zero-fills the returned
288        // memory, satisfying the `GlobalAlloc::alloc_zeroed`
289        // contract.
290        let ptr = unsafe { System.alloc_zeroed(layout) };
291        if !ptr.is_null() {
292            if let Some(_g) = ReentryGuard::enter() {
293                self.record_alloc(layout.size() as u64);
294                self.register_self();
295            }
296        }
297        ptr
298    }
299
300    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
301        // SAFETY: per `GlobalAlloc::dealloc`, `ptr` was returned by a
302        // prior call to `alloc`/`alloc_zeroed`/`realloc` on this
303        // allocator with the given `layout`; we forwarded all of
304        // those to `System` with the same `layout`, so the inverse
305        // pairing for `System.dealloc(ptr, layout)` is valid.
306        unsafe { System.dealloc(ptr, layout) };
307        if let Some(_g) = ReentryGuard::enter() {
308            self.record_dealloc(layout.size() as u64);
309        }
310    }
311
312    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
313        // SAFETY: per `GlobalAlloc::realloc`, `ptr` was returned by
314        // a prior allocation with `layout`, `new_size` is non-zero,
315        // and the alignment in `layout` remains valid for the new
316        // size. We forward all three to `System.realloc` which has
317        // the same contract.
318        let new_ptr = unsafe { System.realloc(ptr, layout, new_size) };
319        if !new_ptr.is_null() {
320            if let Some(_g) = ReentryGuard::enter() {
321                self.record_realloc(layout.size() as u64, new_size as u64);
322                self.register_self();
323            }
324        }
325        new_ptr
326    }
327}
328
329/// Snapshot of allocation statistics at a point in time.
330///
331/// Produced by [`ModAlloc::snapshot`] and [`Profiler::stop`].
332///
333/// # Example
334///
335/// ```
336/// use mod_alloc::AllocStats;
337///
338/// let stats = AllocStats {
339///     alloc_count: 10,
340///     total_bytes: 1024,
341///     peak_bytes: 512,
342///     current_bytes: 256,
343/// };
344/// assert_eq!(stats.alloc_count, 10);
345/// ```
346#[derive(Debug, Clone, Copy, PartialEq, Eq)]
347pub struct AllocStats {
348    /// Number of allocations performed.
349    pub alloc_count: u64,
350    /// Total bytes allocated across all allocations. Reallocations
351    /// contribute the growth delta (or zero on shrink).
352    pub total_bytes: u64,
353    /// Peak resident bytes (highest `current_bytes` ever observed).
354    pub peak_bytes: u64,
355    /// Currently-allocated bytes (allocations minus deallocations).
356    pub current_bytes: u64,
357}
358
359/// Scoped profiler that captures a delta between start and stop.
360///
361/// Read the snapshot of the installed [`ModAlloc`] on construction
362/// and again on [`Profiler::stop`], returning the difference. If no
363/// `ModAlloc` is installed as `#[global_allocator]` and no
364/// allocation has occurred through it yet, both snapshots are
365/// zero and the delta is zero.
366///
367/// # Example
368///
369/// ```no_run
370/// use mod_alloc::{ModAlloc, Profiler};
371///
372/// #[global_allocator]
373/// static GLOBAL: ModAlloc = ModAlloc::new();
374///
375/// fn main() {
376///     let p = Profiler::start();
377///     let v: Vec<u8> = vec![0; 1024];
378///     drop(v);
379///     let stats = p.stop();
380///     println!("Captured {} alloc events", stats.alloc_count);
381/// }
382/// ```
383pub struct Profiler {
384    baseline: AllocStats,
385}
386
387impl Profiler {
388    /// Begin profiling, capturing the current allocation state.
389    ///
390    /// If no `ModAlloc` is installed as `#[global_allocator]` or no
391    /// allocation has occurred yet, the captured baseline is all
392    /// zeros.
393    ///
394    /// # Example
395    ///
396    /// ```
397    /// use mod_alloc::Profiler;
398    ///
399    /// let p = Profiler::start();
400    /// let _delta = p.stop();
401    /// ```
402    pub fn start() -> Self {
403        Self {
404            baseline: current_snapshot_or_zeros(),
405        }
406    }
407
408    /// Stop profiling and return the delta from start.
409    ///
410    /// `alloc_count`, `total_bytes`, and `current_bytes` are deltas
411    /// from `start()` to `stop()`. `peak_bytes` is the absolute
412    /// high-water mark observed during the profiling window (peak
413    /// has no meaningful delta semantic).
414    ///
415    /// # Example
416    ///
417    /// ```
418    /// use mod_alloc::Profiler;
419    ///
420    /// let p = Profiler::start();
421    /// let stats = p.stop();
422    /// assert_eq!(stats.alloc_count, 0);
423    /// ```
424    pub fn stop(self) -> AllocStats {
425        let now = current_snapshot_or_zeros();
426        AllocStats {
427            alloc_count: now.alloc_count.saturating_sub(self.baseline.alloc_count),
428            total_bytes: now.total_bytes.saturating_sub(self.baseline.total_bytes),
429            current_bytes: now
430                .current_bytes
431                .saturating_sub(self.baseline.current_bytes),
432            peak_bytes: now.peak_bytes,
433        }
434    }
435}
436
437fn current_snapshot_or_zeros() -> AllocStats {
438    let p = GLOBAL_HANDLE.load(Ordering::Acquire);
439    if p.is_null() {
440        AllocStats {
441            alloc_count: 0,
442            total_bytes: 0,
443            peak_bytes: 0,
444            current_bytes: 0,
445        }
446    } else {
447        // SAFETY: `GLOBAL_HANDLE` is only ever set by
448        // `ModAlloc::register_self` to point at the address of a
449        // `#[global_allocator] static` (or any other `'static`
450        // `ModAlloc`). That target has `'static` lifetime, so the
451        // pointer remains valid for the remainder of the program.
452        // We produce only a shared borrow used to call `&self`
453        // methods that read atomic counters; no mutation through
454        // the pointer occurs here.
455        unsafe { (*p).snapshot() }
456    }
457}
458
459#[cfg(test)]
460mod tests {
461    use super::*;
462
463    #[test]
464    fn allocator_constructs() {
465        let _ = ModAlloc::new();
466    }
467
468    #[test]
469    fn snapshot_returns_zeros_initially() {
470        let a = ModAlloc::new();
471        let s = a.snapshot();
472        assert_eq!(s.alloc_count, 0);
473        assert_eq!(s.total_bytes, 0);
474        assert_eq!(s.peak_bytes, 0);
475        assert_eq!(s.current_bytes, 0);
476    }
477
478    #[test]
479    fn reset_works() {
480        let a = ModAlloc::new();
481        a.reset();
482        let s = a.snapshot();
483        assert_eq!(s.alloc_count, 0);
484    }
485
486    #[test]
487    fn record_alloc_updates_counters() {
488        let a = ModAlloc::new();
489        a.record_alloc(128);
490        a.record_alloc(256);
491        let s = a.snapshot();
492        assert_eq!(s.alloc_count, 2);
493        assert_eq!(s.total_bytes, 384);
494        assert_eq!(s.current_bytes, 384);
495        assert_eq!(s.peak_bytes, 384);
496    }
497
498    #[test]
499    fn record_dealloc_decreases_current_only() {
500        let a = ModAlloc::new();
501        a.record_alloc(1000);
502        a.record_dealloc(400);
503        let s = a.snapshot();
504        assert_eq!(s.alloc_count, 1);
505        assert_eq!(s.total_bytes, 1000);
506        assert_eq!(s.current_bytes, 600);
507        assert_eq!(s.peak_bytes, 1000);
508    }
509
510    #[test]
511    fn record_realloc_growth_updates_total_and_peak() {
512        let a = ModAlloc::new();
513        a.record_alloc(100);
514        a.record_realloc(100, 250);
515        let s = a.snapshot();
516        assert_eq!(s.alloc_count, 2);
517        assert_eq!(s.total_bytes, 250);
518        assert_eq!(s.current_bytes, 250);
519        assert_eq!(s.peak_bytes, 250);
520    }
521
522    #[test]
523    fn record_realloc_shrink_only_adjusts_current() {
524        let a = ModAlloc::new();
525        a.record_alloc(500);
526        a.record_realloc(500, 200);
527        let s = a.snapshot();
528        assert_eq!(s.alloc_count, 2);
529        assert_eq!(s.total_bytes, 500);
530        assert_eq!(s.current_bytes, 200);
531        assert_eq!(s.peak_bytes, 500);
532    }
533
534    #[test]
535    fn peak_holds_high_water_mark() {
536        let a = ModAlloc::new();
537        a.record_alloc(1000);
538        a.record_dealloc(1000);
539        a.record_alloc(500);
540        let s = a.snapshot();
541        assert_eq!(s.peak_bytes, 1000);
542        assert_eq!(s.current_bytes, 500);
543    }
544
545    #[test]
546    fn reentry_guard_blocks_nested_entry() {
547        let outer = ReentryGuard::enter();
548        assert!(outer.is_some());
549        let inner = ReentryGuard::enter();
550        assert!(inner.is_none(), "nested entry must be denied");
551        drop(outer);
552        let after = ReentryGuard::enter();
553        assert!(after.is_some(), "entry must be allowed after outer drops");
554    }
555
556    #[test]
557    fn profiler_start_stop_with_no_handle() {
558        let p = Profiler::start();
559        let s = p.stop();
560        assert_eq!(s.alloc_count, 0);
561    }
562}