mod_alloc/lib.rs
1//! # mod-alloc
2//!
3//! Allocation profiling for Rust. Tracks allocation counts, total
4//! bytes, peak resident memory, and current resident memory by
5//! wrapping the system allocator via [`GlobalAlloc`].
6//!
7//! Designed as a lean replacement for `dhat` with MSRV 1.75 and
8//! zero external dependencies on the hot path.
9//!
10//! ## Installing as the global allocator
11//!
12//! ```no_run
13//! use mod_alloc::{ModAlloc, Profiler};
14//!
15//! #[global_allocator]
16//! static GLOBAL: ModAlloc = ModAlloc::new();
17//!
18//! fn main() {
19//! let p = Profiler::start();
20//!
21//! let v: Vec<u64> = (0..1000).collect();
22//! drop(v);
23//!
24//! let stats = p.stop();
25//! println!("Allocations: {}", stats.alloc_count);
26//! println!("Total bytes: {}", stats.total_bytes);
27//! println!("Peak bytes (absolute): {}", stats.peak_bytes);
28//! }
29//! ```
30//!
31//! ## Counter semantics
32//!
33//! The four Tier 1 counters track allocator activity since the
34//! installed [`ModAlloc`] began counting (or since the last
35//! [`ModAlloc::reset`] call):
36//!
37//! | Counter | Updated on `alloc` | Updated on `dealloc` |
38//! |-----------------|-------------------------------|----------------------|
39//! | `alloc_count` | `+= 1` | (unchanged) |
40//! | `total_bytes` | `+= size` | (unchanged) |
41//! | `current_bytes` | `+= size` | `-= size` |
42//! | `peak_bytes` | high-water mark of `current` | (unchanged) |
43//!
44//! `realloc` is counted as one allocation event. `total_bytes`
45//! increases by the growth delta on a growing realloc and is
46//! unchanged on a shrinking realloc.
47//!
48//! ## Status
49//!
50//! v0.9.1 adds Tier 2 (inline backtrace capture) behind the
51//! `backtraces` feature. Default builds still ship Tier 1
52//! counters only. Tier 3 (DHAT-compatible JSON output) lands in
53//! v0.9.3.
54//!
55//! ## Backtraces (`backtraces` feature)
56//!
57//! With `mod-alloc = { version = "0.9", features = ["backtraces"] }`
58//! and `RUSTFLAGS="-C force-frame-pointers=yes"`, each tracked
59//! allocation captures up to 8 frames of its call site via inline
60//! frame-pointer walking on `x86_64` and `aarch64`. Per-call-site
61//! aggregation is exposed via `ModAlloc::call_sites` (available
62//! only with the `backtraces` feature); the result is raw return
63//! addresses. Symbolication ships in v0.9.2.
64//!
65//! Aggregation-table size is controlled by the `MOD_ALLOC_BUCKETS`
66//! environment variable at process start (default 4,096 buckets,
67//! ~384 KB).
68
69#![cfg_attr(docsrs, feature(doc_cfg))]
70#![warn(missing_docs)]
71#![warn(rust_2018_idioms)]
72
73#[cfg(feature = "backtraces")]
74mod backtrace;
75
76#[cfg(feature = "backtraces")]
77pub use backtrace::CallSiteStats;
78
79#[cfg(feature = "symbolicate")]
80mod symbolicate;
81
82#[cfg(feature = "symbolicate")]
83pub use symbolicate::{SymbolicatedCallSite, SymbolicatedFrame};
84
85use std::alloc::{GlobalAlloc, Layout, System};
86use std::cell::Cell;
87use std::ptr;
88use std::sync::atomic::{AtomicPtr, AtomicU64, Ordering};
89
90// Process-wide handle to the installed `ModAlloc`. Populated lazily
91// on the first non-reentrant alloc call. `Profiler` reads from this
92// to locate the canonical counters without requiring an explicit
93// registration call from the user.
94static GLOBAL_HANDLE: AtomicPtr<ModAlloc> = AtomicPtr::new(ptr::null_mut());
95
96thread_local! {
97 // Reentrancy flag. Set while inside the tracking path; if any
98 // allocation occurs while set, the recursive call bypasses
99 // tracking and forwards directly to the System allocator.
100 //
101 // `const` initialization (stable since 1.59) avoids any lazy
102 // construction allocation inside the TLS access path.
103 static IN_ALLOC: Cell<bool> = const { Cell::new(false) };
104}
105
106// RAII guard for the reentrancy flag. `enter` returns `None` if the
107// current thread is already inside a tracked allocation (caller
108// must skip counter updates) or if TLS is unavailable (e.g. during
109// thread teardown). The guard clears the flag on drop.
110struct ReentryGuard;
111
112impl ReentryGuard {
113 fn enter() -> Option<Self> {
114 IN_ALLOC
115 .try_with(|flag| {
116 if flag.get() {
117 None
118 } else {
119 flag.set(true);
120 Some(ReentryGuard)
121 }
122 })
123 .ok()
124 .flatten()
125 }
126}
127
128impl Drop for ReentryGuard {
129 fn drop(&mut self) {
130 let _ = IN_ALLOC.try_with(|flag| flag.set(false));
131 }
132}
133
134/// Global allocator wrapper that tracks allocations.
135///
136/// Install as `#[global_allocator]` to enable tracking. The wrapper
137/// forwards every allocation, deallocation, reallocation, and
138/// zero-initialised allocation to [`std::alloc::System`] and records
139/// the event in four lock-free [`AtomicU64`] counters.
140///
141/// # Example
142///
143/// ```no_run
144/// use mod_alloc::ModAlloc;
145///
146/// #[global_allocator]
147/// static GLOBAL: ModAlloc = ModAlloc::new();
148///
149/// fn main() {
150/// let v: Vec<u8> = vec![0; 1024];
151/// let stats = GLOBAL.snapshot();
152/// assert!(stats.alloc_count >= 1);
153/// drop(v);
154/// }
155/// ```
156pub struct ModAlloc {
157 alloc_count: AtomicU64,
158 total_bytes: AtomicU64,
159 peak_bytes: AtomicU64,
160 current_bytes: AtomicU64,
161}
162
163impl ModAlloc {
164 /// Construct a new `ModAlloc` allocator wrapper.
165 ///
166 /// All counters start at zero. This function is `const`, which
167 /// allows construction in a `static` for use as
168 /// `#[global_allocator]`.
169 ///
170 /// # Example
171 ///
172 /// ```
173 /// use mod_alloc::ModAlloc;
174 ///
175 /// static GLOBAL: ModAlloc = ModAlloc::new();
176 /// let stats = GLOBAL.snapshot();
177 /// assert_eq!(stats.alloc_count, 0);
178 /// ```
179 pub const fn new() -> Self {
180 Self {
181 alloc_count: AtomicU64::new(0),
182 total_bytes: AtomicU64::new(0),
183 peak_bytes: AtomicU64::new(0),
184 current_bytes: AtomicU64::new(0),
185 }
186 }
187
188 /// Snapshot the current counter values.
189 ///
190 /// Each counter is read independently with `Relaxed` ordering;
191 /// the resulting [`AllocStats`] is a coherent best-effort view
192 /// but does not represent a single atomic moment in time. For
193 /// scoped measurement, prefer [`Profiler`].
194 ///
195 /// # Example
196 ///
197 /// ```
198 /// use mod_alloc::ModAlloc;
199 ///
200 /// let alloc = ModAlloc::new();
201 /// let stats = alloc.snapshot();
202 /// assert_eq!(stats.alloc_count, 0);
203 /// ```
204 pub fn snapshot(&self) -> AllocStats {
205 AllocStats {
206 alloc_count: self.alloc_count.load(Ordering::Relaxed),
207 total_bytes: self.total_bytes.load(Ordering::Relaxed),
208 peak_bytes: self.peak_bytes.load(Ordering::Relaxed),
209 current_bytes: self.current_bytes.load(Ordering::Relaxed),
210 }
211 }
212
213 /// Reset all counters to zero.
214 ///
215 /// Intended for use at the start of a profile run, before any
216 /// outstanding allocations exist. Calling `reset` while
217 /// allocations are live can cause `current_bytes` to wrap on
218 /// subsequent deallocations; the other counters are unaffected.
219 ///
220 /// # Example
221 ///
222 /// ```
223 /// use mod_alloc::ModAlloc;
224 ///
225 /// let alloc = ModAlloc::new();
226 /// alloc.reset();
227 /// let stats = alloc.snapshot();
228 /// assert_eq!(stats.alloc_count, 0);
229 /// ```
230 pub fn reset(&self) {
231 self.alloc_count.store(0, Ordering::Relaxed);
232 self.total_bytes.store(0, Ordering::Relaxed);
233 self.peak_bytes.store(0, Ordering::Relaxed);
234 self.current_bytes.store(0, Ordering::Relaxed);
235 }
236
237 #[inline]
238 fn record_alloc(&self, size: u64) {
239 self.alloc_count.fetch_add(1, Ordering::Relaxed);
240 self.total_bytes.fetch_add(size, Ordering::Relaxed);
241 let new_current = self.current_bytes.fetch_add(size, Ordering::Relaxed) + size;
242 self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
243 }
244
245 #[inline]
246 fn record_dealloc(&self, size: u64) {
247 self.current_bytes.fetch_sub(size, Ordering::Relaxed);
248 }
249
250 #[inline]
251 fn record_realloc(&self, old_size: u64, new_size: u64) {
252 self.alloc_count.fetch_add(1, Ordering::Relaxed);
253 if new_size > old_size {
254 let delta = new_size - old_size;
255 self.total_bytes.fetch_add(delta, Ordering::Relaxed);
256 let new_current = self.current_bytes.fetch_add(delta, Ordering::Relaxed) + delta;
257 self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
258 } else if new_size < old_size {
259 self.current_bytes
260 .fetch_sub(old_size - new_size, Ordering::Relaxed);
261 }
262 }
263
264 #[inline]
265 fn register_self(&self) {
266 if GLOBAL_HANDLE.load(Ordering::Relaxed).is_null() {
267 let _ = GLOBAL_HANDLE.compare_exchange(
268 ptr::null_mut(),
269 self as *const ModAlloc as *mut ModAlloc,
270 Ordering::Release,
271 Ordering::Relaxed,
272 );
273 }
274 }
275
276 /// Drain the per-call-site aggregation table into a `Vec`.
277 ///
278 /// Available only with the `backtraces` cargo feature. The
279 /// returned vector contains one [`CallSiteStats`] per unique
280 /// call site observed since the table was first written. Each
281 /// row carries up to 8 raw return addresses (top of stack
282 /// first), the number of allocations attributed to that site,
283 /// and the total bytes.
284 ///
285 /// Symbolication (resolving addresses to function names)
286 /// lands in `v0.9.2`. This method exposes raw addresses only.
287 ///
288 /// # Example
289 ///
290 /// ```no_run
291 /// # #[cfg(feature = "backtraces")]
292 /// # fn demo() {
293 /// use mod_alloc::ModAlloc;
294 ///
295 /// #[global_allocator]
296 /// static GLOBAL: ModAlloc = ModAlloc::new();
297 ///
298 /// let _v: Vec<u8> = vec![0; 1024];
299 /// for site in GLOBAL.call_sites() {
300 /// println!("{} allocs, {} bytes at {:#x}",
301 /// site.count, site.total_bytes, site.frames[0]);
302 /// }
303 /// # }
304 /// ```
305 #[cfg(feature = "backtraces")]
306 pub fn call_sites(&self) -> Vec<CallSiteStats> {
307 backtrace::call_sites_report()
308 }
309
310 /// Drain the per-call-site table and symbolicate each frame
311 /// against the running binary's own debug info.
312 ///
313 /// Available only with the `symbolicate` cargo feature, which
314 /// also implies `backtraces`. Returns one
315 /// [`SymbolicatedCallSite`] per unique call site, each
316 /// carrying resolved function names plus (where available)
317 /// source file and line.
318 ///
319 /// Allocates. Safe to call from non-allocator contexts only
320 /// (ordinary user code outside the global-allocator hook).
321 ///
322 /// Results are cached per-address across calls.
323 ///
324 /// # Example
325 ///
326 /// ```no_run
327 /// # #[cfg(feature = "symbolicate")]
328 /// # fn demo() {
329 /// use mod_alloc::ModAlloc;
330 ///
331 /// #[global_allocator]
332 /// static GLOBAL: ModAlloc = ModAlloc::new();
333 ///
334 /// let _v: Vec<u8> = vec![0; 1024];
335 /// for site in GLOBAL.symbolicated_report() {
336 /// let top = &site.frames[0];
337 /// println!("{} allocs / {} bytes at {}",
338 /// site.count,
339 /// site.total_bytes,
340 /// top.function.as_deref().unwrap_or("<unresolved>"));
341 /// }
342 /// # }
343 /// ```
344 #[cfg(feature = "symbolicate")]
345 pub fn symbolicated_report(&self) -> Vec<SymbolicatedCallSite> {
346 symbolicate::symbolicated_report()
347 }
348}
349
350impl Default for ModAlloc {
351 fn default() -> Self {
352 Self::new()
353 }
354}
355
356// SAFETY: `ModAlloc` adds counter bookkeeping but performs all
357// underlying allocation through [`std::alloc::System`]. Each method
358// forwards its arguments unchanged to `System` and only inspects
359// the result; size/alignment invariants required by the
360// `GlobalAlloc` contract are passed through unmodified, so the
361// caller's contract to us becomes our contract to System.
362//
363// The counter-update path uses thread-local reentrancy detection
364// (see `ReentryGuard`) so that any allocation triggered transitively
365// inside the tracking path bypasses tracking and forwards directly
366// to System, preserving the "hook MUST NOT itself allocate"
367// invariant from REPS section 4.
368unsafe impl GlobalAlloc for ModAlloc {
369 unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
370 // SAFETY: per `GlobalAlloc::alloc`, `layout` has non-zero
371 // size; we forward unchanged to `System.alloc`, which has
372 // the same contract.
373 let ptr = unsafe { System.alloc(layout) };
374 if !ptr.is_null() {
375 if let Some(_g) = ReentryGuard::enter() {
376 let size = layout.size() as u64;
377 self.record_alloc(size);
378 self.register_self();
379 #[cfg(feature = "backtraces")]
380 backtrace::record_event(size);
381 }
382 }
383 ptr
384 }
385
386 unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
387 // SAFETY: same invariants as `alloc`; `layout` forwarded
388 // unchanged. `System.alloc_zeroed` zero-fills the returned
389 // memory, satisfying the `GlobalAlloc::alloc_zeroed`
390 // contract.
391 let ptr = unsafe { System.alloc_zeroed(layout) };
392 if !ptr.is_null() {
393 if let Some(_g) = ReentryGuard::enter() {
394 let size = layout.size() as u64;
395 self.record_alloc(size);
396 self.register_self();
397 #[cfg(feature = "backtraces")]
398 backtrace::record_event(size);
399 }
400 }
401 ptr
402 }
403
404 unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
405 // SAFETY: per `GlobalAlloc::dealloc`, `ptr` was returned by a
406 // prior call to `alloc`/`alloc_zeroed`/`realloc` on this
407 // allocator with the given `layout`; we forwarded all of
408 // those to `System` with the same `layout`, so the inverse
409 // pairing for `System.dealloc(ptr, layout)` is valid.
410 unsafe { System.dealloc(ptr, layout) };
411 if let Some(_g) = ReentryGuard::enter() {
412 self.record_dealloc(layout.size() as u64);
413 }
414 }
415
416 unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
417 // SAFETY: per `GlobalAlloc::realloc`, `ptr` was returned by
418 // a prior allocation with `layout`, `new_size` is non-zero,
419 // and the alignment in `layout` remains valid for the new
420 // size. We forward all three to `System.realloc` which has
421 // the same contract.
422 let new_ptr = unsafe { System.realloc(ptr, layout, new_size) };
423 if !new_ptr.is_null() {
424 if let Some(_g) = ReentryGuard::enter() {
425 self.record_realloc(layout.size() as u64, new_size as u64);
426 self.register_self();
427 // Per dhat semantics: realloc records as one event
428 // attributed to `new_size` (including shrinks).
429 #[cfg(feature = "backtraces")]
430 backtrace::record_event(new_size as u64);
431 }
432 }
433 new_ptr
434 }
435}
436
437/// Snapshot of allocation statistics at a point in time.
438///
439/// Produced by [`ModAlloc::snapshot`] and [`Profiler::stop`].
440///
441/// # Example
442///
443/// ```
444/// use mod_alloc::AllocStats;
445///
446/// let stats = AllocStats {
447/// alloc_count: 10,
448/// total_bytes: 1024,
449/// peak_bytes: 512,
450/// current_bytes: 256,
451/// };
452/// assert_eq!(stats.alloc_count, 10);
453/// ```
454#[derive(Debug, Clone, Copy, PartialEq, Eq)]
455pub struct AllocStats {
456 /// Number of allocations performed.
457 pub alloc_count: u64,
458 /// Total bytes allocated across all allocations. Reallocations
459 /// contribute the growth delta (or zero on shrink).
460 pub total_bytes: u64,
461 /// Peak resident bytes (highest `current_bytes` ever observed).
462 pub peak_bytes: u64,
463 /// Currently-allocated bytes (allocations minus deallocations).
464 pub current_bytes: u64,
465}
466
467/// Scoped profiler that captures a delta between start and stop.
468///
469/// Read the snapshot of the installed [`ModAlloc`] on construction
470/// and again on [`Profiler::stop`], returning the difference. If no
471/// `ModAlloc` is installed as `#[global_allocator]` and no
472/// allocation has occurred through it yet, both snapshots are
473/// zero and the delta is zero.
474///
475/// # Example
476///
477/// ```no_run
478/// use mod_alloc::{ModAlloc, Profiler};
479///
480/// #[global_allocator]
481/// static GLOBAL: ModAlloc = ModAlloc::new();
482///
483/// fn main() {
484/// let p = Profiler::start();
485/// let v: Vec<u8> = vec![0; 1024];
486/// drop(v);
487/// let stats = p.stop();
488/// println!("Captured {} alloc events", stats.alloc_count);
489/// }
490/// ```
491pub struct Profiler {
492 baseline: AllocStats,
493}
494
495impl Profiler {
496 /// Begin profiling, capturing the current allocation state.
497 ///
498 /// If no `ModAlloc` is installed as `#[global_allocator]` or no
499 /// allocation has occurred yet, the captured baseline is all
500 /// zeros.
501 ///
502 /// # Example
503 ///
504 /// ```
505 /// use mod_alloc::Profiler;
506 ///
507 /// let p = Profiler::start();
508 /// let _delta = p.stop();
509 /// ```
510 pub fn start() -> Self {
511 Self {
512 baseline: current_snapshot_or_zeros(),
513 }
514 }
515
516 /// Stop profiling and return the delta from start.
517 ///
518 /// `alloc_count`, `total_bytes`, and `current_bytes` are deltas
519 /// from `start()` to `stop()`. `peak_bytes` is the absolute
520 /// high-water mark observed during the profiling window (peak
521 /// has no meaningful delta semantic).
522 ///
523 /// # Example
524 ///
525 /// ```
526 /// use mod_alloc::Profiler;
527 ///
528 /// let p = Profiler::start();
529 /// let stats = p.stop();
530 /// assert_eq!(stats.alloc_count, 0);
531 /// ```
532 pub fn stop(self) -> AllocStats {
533 let now = current_snapshot_or_zeros();
534 AllocStats {
535 alloc_count: now.alloc_count.saturating_sub(self.baseline.alloc_count),
536 total_bytes: now.total_bytes.saturating_sub(self.baseline.total_bytes),
537 current_bytes: now
538 .current_bytes
539 .saturating_sub(self.baseline.current_bytes),
540 peak_bytes: now.peak_bytes,
541 }
542 }
543}
544
545fn current_snapshot_or_zeros() -> AllocStats {
546 let p = GLOBAL_HANDLE.load(Ordering::Acquire);
547 if p.is_null() {
548 AllocStats {
549 alloc_count: 0,
550 total_bytes: 0,
551 peak_bytes: 0,
552 current_bytes: 0,
553 }
554 } else {
555 // SAFETY: `GLOBAL_HANDLE` is only ever set by
556 // `ModAlloc::register_self` to point at the address of a
557 // `#[global_allocator] static` (or any other `'static`
558 // `ModAlloc`). That target has `'static` lifetime, so the
559 // pointer remains valid for the remainder of the program.
560 // We produce only a shared borrow used to call `&self`
561 // methods that read atomic counters; no mutation through
562 // the pointer occurs here.
563 unsafe { (*p).snapshot() }
564 }
565}
566
567#[cfg(test)]
568mod tests {
569 use super::*;
570
571 #[test]
572 fn allocator_constructs() {
573 let _ = ModAlloc::new();
574 }
575
576 #[test]
577 fn snapshot_returns_zeros_initially() {
578 let a = ModAlloc::new();
579 let s = a.snapshot();
580 assert_eq!(s.alloc_count, 0);
581 assert_eq!(s.total_bytes, 0);
582 assert_eq!(s.peak_bytes, 0);
583 assert_eq!(s.current_bytes, 0);
584 }
585
586 #[test]
587 fn reset_works() {
588 let a = ModAlloc::new();
589 a.reset();
590 let s = a.snapshot();
591 assert_eq!(s.alloc_count, 0);
592 }
593
594 #[test]
595 fn record_alloc_updates_counters() {
596 let a = ModAlloc::new();
597 a.record_alloc(128);
598 a.record_alloc(256);
599 let s = a.snapshot();
600 assert_eq!(s.alloc_count, 2);
601 assert_eq!(s.total_bytes, 384);
602 assert_eq!(s.current_bytes, 384);
603 assert_eq!(s.peak_bytes, 384);
604 }
605
606 #[test]
607 fn record_dealloc_decreases_current_only() {
608 let a = ModAlloc::new();
609 a.record_alloc(1000);
610 a.record_dealloc(400);
611 let s = a.snapshot();
612 assert_eq!(s.alloc_count, 1);
613 assert_eq!(s.total_bytes, 1000);
614 assert_eq!(s.current_bytes, 600);
615 assert_eq!(s.peak_bytes, 1000);
616 }
617
618 #[test]
619 fn record_realloc_growth_updates_total_and_peak() {
620 let a = ModAlloc::new();
621 a.record_alloc(100);
622 a.record_realloc(100, 250);
623 let s = a.snapshot();
624 assert_eq!(s.alloc_count, 2);
625 assert_eq!(s.total_bytes, 250);
626 assert_eq!(s.current_bytes, 250);
627 assert_eq!(s.peak_bytes, 250);
628 }
629
630 #[test]
631 fn record_realloc_shrink_only_adjusts_current() {
632 let a = ModAlloc::new();
633 a.record_alloc(500);
634 a.record_realloc(500, 200);
635 let s = a.snapshot();
636 assert_eq!(s.alloc_count, 2);
637 assert_eq!(s.total_bytes, 500);
638 assert_eq!(s.current_bytes, 200);
639 assert_eq!(s.peak_bytes, 500);
640 }
641
642 #[test]
643 fn peak_holds_high_water_mark() {
644 let a = ModAlloc::new();
645 a.record_alloc(1000);
646 a.record_dealloc(1000);
647 a.record_alloc(500);
648 let s = a.snapshot();
649 assert_eq!(s.peak_bytes, 1000);
650 assert_eq!(s.current_bytes, 500);
651 }
652
653 #[test]
654 fn reentry_guard_blocks_nested_entry() {
655 let outer = ReentryGuard::enter();
656 assert!(outer.is_some());
657 let inner = ReentryGuard::enter();
658 assert!(inner.is_none(), "nested entry must be denied");
659 drop(outer);
660 let after = ReentryGuard::enter();
661 assert!(after.is_some(), "entry must be allowed after outer drops");
662 }
663
664 #[test]
665 fn profiler_start_stop_with_no_handle() {
666 let p = Profiler::start();
667 let s = p.stop();
668 assert_eq!(s.alloc_count, 0);
669 }
670}