mod_alloc/lib.rs
1//! # mod-alloc
2//!
3//! Allocation profiling for Rust. Tracks allocation counts, total
4//! bytes, peak resident memory, and current resident memory by
5//! wrapping the system allocator via [`GlobalAlloc`].
6//!
7//! Designed as a lean replacement for `dhat` with MSRV 1.75 and
8//! zero external dependencies on the hot path.
9//!
10//! ## Installing as the global allocator
11//!
12//! ```no_run
13//! use mod_alloc::{ModAlloc, Profiler};
14//!
15//! #[global_allocator]
16//! static GLOBAL: ModAlloc = ModAlloc::new();
17//!
18//! fn main() {
19//! let p = Profiler::start();
20//!
21//! let v: Vec<u64> = (0..1000).collect();
22//! drop(v);
23//!
24//! let stats = p.stop();
25//! println!("Allocations: {}", stats.alloc_count);
26//! println!("Total bytes: {}", stats.total_bytes);
27//! println!("Peak bytes (absolute): {}", stats.peak_bytes);
28//! }
29//! ```
30//!
31//! ## Counter semantics
32//!
33//! The four Tier 1 counters track allocator activity since the
34//! installed [`ModAlloc`] began counting (or since the last
35//! [`ModAlloc::reset`] call):
36//!
37//! | Counter | Updated on `alloc` | Updated on `dealloc` |
38//! |-----------------|-------------------------------|----------------------|
39//! | `alloc_count` | `+= 1` | (unchanged) |
40//! | `total_bytes` | `+= size` | (unchanged) |
41//! | `current_bytes` | `+= size` | `-= size` |
42//! | `peak_bytes` | high-water mark of `current` | (unchanged) |
43//!
44//! `realloc` is counted as one allocation event. `total_bytes`
45//! increases by the growth delta on a growing realloc and is
46//! unchanged on a shrinking realloc.
47//!
48//! ## Status
49//!
50//! v0.9.1 adds Tier 2 (inline backtrace capture) behind the
51//! `backtraces` feature. Default builds still ship Tier 1
52//! counters only. Tier 3 (DHAT-compatible JSON output) lands in
53//! v0.9.3.
54//!
55//! ## Backtraces (`backtraces` feature)
56//!
57//! With `mod-alloc = { version = "0.9", features = ["backtraces"] }`
58//! and `RUSTFLAGS="-C force-frame-pointers=yes"`, each tracked
59//! allocation captures up to 8 frames of its call site via inline
60//! frame-pointer walking on `x86_64` and `aarch64`. Per-call-site
61//! aggregation is exposed via `ModAlloc::call_sites` (available
62//! only with the `backtraces` feature); the result is raw return
63//! addresses. Symbolication ships in v0.9.2.
64//!
65//! Aggregation-table size is controlled by the `MOD_ALLOC_BUCKETS`
66//! environment variable at process start (default 4,096 buckets,
67//! ~384 KB).
68
69#![cfg_attr(docsrs, feature(doc_cfg))]
70#![warn(missing_docs)]
71#![warn(rust_2018_idioms)]
72
73#[cfg(feature = "backtraces")]
74mod backtrace;
75
76#[cfg(feature = "backtraces")]
77pub use backtrace::CallSiteStats;
78
79use std::alloc::{GlobalAlloc, Layout, System};
80use std::cell::Cell;
81use std::ptr;
82use std::sync::atomic::{AtomicPtr, AtomicU64, Ordering};
83
84// Process-wide handle to the installed `ModAlloc`. Populated lazily
85// on the first non-reentrant alloc call. `Profiler` reads from this
86// to locate the canonical counters without requiring an explicit
87// registration call from the user.
88static GLOBAL_HANDLE: AtomicPtr<ModAlloc> = AtomicPtr::new(ptr::null_mut());
89
90thread_local! {
91 // Reentrancy flag. Set while inside the tracking path; if any
92 // allocation occurs while set, the recursive call bypasses
93 // tracking and forwards directly to the System allocator.
94 //
95 // `const` initialization (stable since 1.59) avoids any lazy
96 // construction allocation inside the TLS access path.
97 static IN_ALLOC: Cell<bool> = const { Cell::new(false) };
98}
99
100// RAII guard for the reentrancy flag. `enter` returns `None` if the
101// current thread is already inside a tracked allocation (caller
102// must skip counter updates) or if TLS is unavailable (e.g. during
103// thread teardown). The guard clears the flag on drop.
104struct ReentryGuard;
105
106impl ReentryGuard {
107 fn enter() -> Option<Self> {
108 IN_ALLOC
109 .try_with(|flag| {
110 if flag.get() {
111 None
112 } else {
113 flag.set(true);
114 Some(ReentryGuard)
115 }
116 })
117 .ok()
118 .flatten()
119 }
120}
121
122impl Drop for ReentryGuard {
123 fn drop(&mut self) {
124 let _ = IN_ALLOC.try_with(|flag| flag.set(false));
125 }
126}
127
128/// Global allocator wrapper that tracks allocations.
129///
130/// Install as `#[global_allocator]` to enable tracking. The wrapper
131/// forwards every allocation, deallocation, reallocation, and
132/// zero-initialised allocation to [`std::alloc::System`] and records
133/// the event in four lock-free [`AtomicU64`] counters.
134///
135/// # Example
136///
137/// ```no_run
138/// use mod_alloc::ModAlloc;
139///
140/// #[global_allocator]
141/// static GLOBAL: ModAlloc = ModAlloc::new();
142///
143/// fn main() {
144/// let v: Vec<u8> = vec![0; 1024];
145/// let stats = GLOBAL.snapshot();
146/// assert!(stats.alloc_count >= 1);
147/// drop(v);
148/// }
149/// ```
150pub struct ModAlloc {
151 alloc_count: AtomicU64,
152 total_bytes: AtomicU64,
153 peak_bytes: AtomicU64,
154 current_bytes: AtomicU64,
155}
156
157impl ModAlloc {
158 /// Construct a new `ModAlloc` allocator wrapper.
159 ///
160 /// All counters start at zero. This function is `const`, which
161 /// allows construction in a `static` for use as
162 /// `#[global_allocator]`.
163 ///
164 /// # Example
165 ///
166 /// ```
167 /// use mod_alloc::ModAlloc;
168 ///
169 /// static GLOBAL: ModAlloc = ModAlloc::new();
170 /// let stats = GLOBAL.snapshot();
171 /// assert_eq!(stats.alloc_count, 0);
172 /// ```
173 pub const fn new() -> Self {
174 Self {
175 alloc_count: AtomicU64::new(0),
176 total_bytes: AtomicU64::new(0),
177 peak_bytes: AtomicU64::new(0),
178 current_bytes: AtomicU64::new(0),
179 }
180 }
181
182 /// Snapshot the current counter values.
183 ///
184 /// Each counter is read independently with `Relaxed` ordering;
185 /// the resulting [`AllocStats`] is a coherent best-effort view
186 /// but does not represent a single atomic moment in time. For
187 /// scoped measurement, prefer [`Profiler`].
188 ///
189 /// # Example
190 ///
191 /// ```
192 /// use mod_alloc::ModAlloc;
193 ///
194 /// let alloc = ModAlloc::new();
195 /// let stats = alloc.snapshot();
196 /// assert_eq!(stats.alloc_count, 0);
197 /// ```
198 pub fn snapshot(&self) -> AllocStats {
199 AllocStats {
200 alloc_count: self.alloc_count.load(Ordering::Relaxed),
201 total_bytes: self.total_bytes.load(Ordering::Relaxed),
202 peak_bytes: self.peak_bytes.load(Ordering::Relaxed),
203 current_bytes: self.current_bytes.load(Ordering::Relaxed),
204 }
205 }
206
207 /// Reset all counters to zero.
208 ///
209 /// Intended for use at the start of a profile run, before any
210 /// outstanding allocations exist. Calling `reset` while
211 /// allocations are live can cause `current_bytes` to wrap on
212 /// subsequent deallocations; the other counters are unaffected.
213 ///
214 /// # Example
215 ///
216 /// ```
217 /// use mod_alloc::ModAlloc;
218 ///
219 /// let alloc = ModAlloc::new();
220 /// alloc.reset();
221 /// let stats = alloc.snapshot();
222 /// assert_eq!(stats.alloc_count, 0);
223 /// ```
224 pub fn reset(&self) {
225 self.alloc_count.store(0, Ordering::Relaxed);
226 self.total_bytes.store(0, Ordering::Relaxed);
227 self.peak_bytes.store(0, Ordering::Relaxed);
228 self.current_bytes.store(0, Ordering::Relaxed);
229 }
230
231 #[inline]
232 fn record_alloc(&self, size: u64) {
233 self.alloc_count.fetch_add(1, Ordering::Relaxed);
234 self.total_bytes.fetch_add(size, Ordering::Relaxed);
235 let new_current = self.current_bytes.fetch_add(size, Ordering::Relaxed) + size;
236 self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
237 }
238
239 #[inline]
240 fn record_dealloc(&self, size: u64) {
241 self.current_bytes.fetch_sub(size, Ordering::Relaxed);
242 }
243
244 #[inline]
245 fn record_realloc(&self, old_size: u64, new_size: u64) {
246 self.alloc_count.fetch_add(1, Ordering::Relaxed);
247 if new_size > old_size {
248 let delta = new_size - old_size;
249 self.total_bytes.fetch_add(delta, Ordering::Relaxed);
250 let new_current = self.current_bytes.fetch_add(delta, Ordering::Relaxed) + delta;
251 self.peak_bytes.fetch_max(new_current, Ordering::Relaxed);
252 } else if new_size < old_size {
253 self.current_bytes
254 .fetch_sub(old_size - new_size, Ordering::Relaxed);
255 }
256 }
257
258 #[inline]
259 fn register_self(&self) {
260 if GLOBAL_HANDLE.load(Ordering::Relaxed).is_null() {
261 let _ = GLOBAL_HANDLE.compare_exchange(
262 ptr::null_mut(),
263 self as *const ModAlloc as *mut ModAlloc,
264 Ordering::Release,
265 Ordering::Relaxed,
266 );
267 }
268 }
269
270 /// Drain the per-call-site aggregation table into a `Vec`.
271 ///
272 /// Available only with the `backtraces` cargo feature. The
273 /// returned vector contains one [`CallSiteStats`] per unique
274 /// call site observed since the table was first written. Each
275 /// row carries up to 8 raw return addresses (top of stack
276 /// first), the number of allocations attributed to that site,
277 /// and the total bytes.
278 ///
279 /// Symbolication (resolving addresses to function names)
280 /// lands in `v0.9.2`. This method exposes raw addresses only.
281 ///
282 /// # Example
283 ///
284 /// ```no_run
285 /// # #[cfg(feature = "backtraces")]
286 /// # fn demo() {
287 /// use mod_alloc::ModAlloc;
288 ///
289 /// #[global_allocator]
290 /// static GLOBAL: ModAlloc = ModAlloc::new();
291 ///
292 /// let _v: Vec<u8> = vec![0; 1024];
293 /// for site in GLOBAL.call_sites() {
294 /// println!("{} allocs, {} bytes at {:#x}",
295 /// site.count, site.total_bytes, site.frames[0]);
296 /// }
297 /// # }
298 /// ```
299 #[cfg(feature = "backtraces")]
300 pub fn call_sites(&self) -> Vec<CallSiteStats> {
301 backtrace::call_sites_report()
302 }
303}
304
305impl Default for ModAlloc {
306 fn default() -> Self {
307 Self::new()
308 }
309}
310
311// SAFETY: `ModAlloc` adds counter bookkeeping but performs all
312// underlying allocation through [`std::alloc::System`]. Each method
313// forwards its arguments unchanged to `System` and only inspects
314// the result; size/alignment invariants required by the
315// `GlobalAlloc` contract are passed through unmodified, so the
316// caller's contract to us becomes our contract to System.
317//
318// The counter-update path uses thread-local reentrancy detection
319// (see `ReentryGuard`) so that any allocation triggered transitively
320// inside the tracking path bypasses tracking and forwards directly
321// to System, preserving the "hook MUST NOT itself allocate"
322// invariant from REPS section 4.
323unsafe impl GlobalAlloc for ModAlloc {
324 unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
325 // SAFETY: per `GlobalAlloc::alloc`, `layout` has non-zero
326 // size; we forward unchanged to `System.alloc`, which has
327 // the same contract.
328 let ptr = unsafe { System.alloc(layout) };
329 if !ptr.is_null() {
330 if let Some(_g) = ReentryGuard::enter() {
331 let size = layout.size() as u64;
332 self.record_alloc(size);
333 self.register_self();
334 #[cfg(feature = "backtraces")]
335 backtrace::record_event(size);
336 }
337 }
338 ptr
339 }
340
341 unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
342 // SAFETY: same invariants as `alloc`; `layout` forwarded
343 // unchanged. `System.alloc_zeroed` zero-fills the returned
344 // memory, satisfying the `GlobalAlloc::alloc_zeroed`
345 // contract.
346 let ptr = unsafe { System.alloc_zeroed(layout) };
347 if !ptr.is_null() {
348 if let Some(_g) = ReentryGuard::enter() {
349 let size = layout.size() as u64;
350 self.record_alloc(size);
351 self.register_self();
352 #[cfg(feature = "backtraces")]
353 backtrace::record_event(size);
354 }
355 }
356 ptr
357 }
358
359 unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
360 // SAFETY: per `GlobalAlloc::dealloc`, `ptr` was returned by a
361 // prior call to `alloc`/`alloc_zeroed`/`realloc` on this
362 // allocator with the given `layout`; we forwarded all of
363 // those to `System` with the same `layout`, so the inverse
364 // pairing for `System.dealloc(ptr, layout)` is valid.
365 unsafe { System.dealloc(ptr, layout) };
366 if let Some(_g) = ReentryGuard::enter() {
367 self.record_dealloc(layout.size() as u64);
368 }
369 }
370
371 unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
372 // SAFETY: per `GlobalAlloc::realloc`, `ptr` was returned by
373 // a prior allocation with `layout`, `new_size` is non-zero,
374 // and the alignment in `layout` remains valid for the new
375 // size. We forward all three to `System.realloc` which has
376 // the same contract.
377 let new_ptr = unsafe { System.realloc(ptr, layout, new_size) };
378 if !new_ptr.is_null() {
379 if let Some(_g) = ReentryGuard::enter() {
380 self.record_realloc(layout.size() as u64, new_size as u64);
381 self.register_self();
382 // Per dhat semantics: realloc records as one event
383 // attributed to `new_size` (including shrinks).
384 #[cfg(feature = "backtraces")]
385 backtrace::record_event(new_size as u64);
386 }
387 }
388 new_ptr
389 }
390}
391
392/// Snapshot of allocation statistics at a point in time.
393///
394/// Produced by [`ModAlloc::snapshot`] and [`Profiler::stop`].
395///
396/// # Example
397///
398/// ```
399/// use mod_alloc::AllocStats;
400///
401/// let stats = AllocStats {
402/// alloc_count: 10,
403/// total_bytes: 1024,
404/// peak_bytes: 512,
405/// current_bytes: 256,
406/// };
407/// assert_eq!(stats.alloc_count, 10);
408/// ```
409#[derive(Debug, Clone, Copy, PartialEq, Eq)]
410pub struct AllocStats {
411 /// Number of allocations performed.
412 pub alloc_count: u64,
413 /// Total bytes allocated across all allocations. Reallocations
414 /// contribute the growth delta (or zero on shrink).
415 pub total_bytes: u64,
416 /// Peak resident bytes (highest `current_bytes` ever observed).
417 pub peak_bytes: u64,
418 /// Currently-allocated bytes (allocations minus deallocations).
419 pub current_bytes: u64,
420}
421
422/// Scoped profiler that captures a delta between start and stop.
423///
424/// Read the snapshot of the installed [`ModAlloc`] on construction
425/// and again on [`Profiler::stop`], returning the difference. If no
426/// `ModAlloc` is installed as `#[global_allocator]` and no
427/// allocation has occurred through it yet, both snapshots are
428/// zero and the delta is zero.
429///
430/// # Example
431///
432/// ```no_run
433/// use mod_alloc::{ModAlloc, Profiler};
434///
435/// #[global_allocator]
436/// static GLOBAL: ModAlloc = ModAlloc::new();
437///
438/// fn main() {
439/// let p = Profiler::start();
440/// let v: Vec<u8> = vec![0; 1024];
441/// drop(v);
442/// let stats = p.stop();
443/// println!("Captured {} alloc events", stats.alloc_count);
444/// }
445/// ```
446pub struct Profiler {
447 baseline: AllocStats,
448}
449
450impl Profiler {
451 /// Begin profiling, capturing the current allocation state.
452 ///
453 /// If no `ModAlloc` is installed as `#[global_allocator]` or no
454 /// allocation has occurred yet, the captured baseline is all
455 /// zeros.
456 ///
457 /// # Example
458 ///
459 /// ```
460 /// use mod_alloc::Profiler;
461 ///
462 /// let p = Profiler::start();
463 /// let _delta = p.stop();
464 /// ```
465 pub fn start() -> Self {
466 Self {
467 baseline: current_snapshot_or_zeros(),
468 }
469 }
470
471 /// Stop profiling and return the delta from start.
472 ///
473 /// `alloc_count`, `total_bytes`, and `current_bytes` are deltas
474 /// from `start()` to `stop()`. `peak_bytes` is the absolute
475 /// high-water mark observed during the profiling window (peak
476 /// has no meaningful delta semantic).
477 ///
478 /// # Example
479 ///
480 /// ```
481 /// use mod_alloc::Profiler;
482 ///
483 /// let p = Profiler::start();
484 /// let stats = p.stop();
485 /// assert_eq!(stats.alloc_count, 0);
486 /// ```
487 pub fn stop(self) -> AllocStats {
488 let now = current_snapshot_or_zeros();
489 AllocStats {
490 alloc_count: now.alloc_count.saturating_sub(self.baseline.alloc_count),
491 total_bytes: now.total_bytes.saturating_sub(self.baseline.total_bytes),
492 current_bytes: now
493 .current_bytes
494 .saturating_sub(self.baseline.current_bytes),
495 peak_bytes: now.peak_bytes,
496 }
497 }
498}
499
500fn current_snapshot_or_zeros() -> AllocStats {
501 let p = GLOBAL_HANDLE.load(Ordering::Acquire);
502 if p.is_null() {
503 AllocStats {
504 alloc_count: 0,
505 total_bytes: 0,
506 peak_bytes: 0,
507 current_bytes: 0,
508 }
509 } else {
510 // SAFETY: `GLOBAL_HANDLE` is only ever set by
511 // `ModAlloc::register_self` to point at the address of a
512 // `#[global_allocator] static` (or any other `'static`
513 // `ModAlloc`). That target has `'static` lifetime, so the
514 // pointer remains valid for the remainder of the program.
515 // We produce only a shared borrow used to call `&self`
516 // methods that read atomic counters; no mutation through
517 // the pointer occurs here.
518 unsafe { (*p).snapshot() }
519 }
520}
521
522#[cfg(test)]
523mod tests {
524 use super::*;
525
526 #[test]
527 fn allocator_constructs() {
528 let _ = ModAlloc::new();
529 }
530
531 #[test]
532 fn snapshot_returns_zeros_initially() {
533 let a = ModAlloc::new();
534 let s = a.snapshot();
535 assert_eq!(s.alloc_count, 0);
536 assert_eq!(s.total_bytes, 0);
537 assert_eq!(s.peak_bytes, 0);
538 assert_eq!(s.current_bytes, 0);
539 }
540
541 #[test]
542 fn reset_works() {
543 let a = ModAlloc::new();
544 a.reset();
545 let s = a.snapshot();
546 assert_eq!(s.alloc_count, 0);
547 }
548
549 #[test]
550 fn record_alloc_updates_counters() {
551 let a = ModAlloc::new();
552 a.record_alloc(128);
553 a.record_alloc(256);
554 let s = a.snapshot();
555 assert_eq!(s.alloc_count, 2);
556 assert_eq!(s.total_bytes, 384);
557 assert_eq!(s.current_bytes, 384);
558 assert_eq!(s.peak_bytes, 384);
559 }
560
561 #[test]
562 fn record_dealloc_decreases_current_only() {
563 let a = ModAlloc::new();
564 a.record_alloc(1000);
565 a.record_dealloc(400);
566 let s = a.snapshot();
567 assert_eq!(s.alloc_count, 1);
568 assert_eq!(s.total_bytes, 1000);
569 assert_eq!(s.current_bytes, 600);
570 assert_eq!(s.peak_bytes, 1000);
571 }
572
573 #[test]
574 fn record_realloc_growth_updates_total_and_peak() {
575 let a = ModAlloc::new();
576 a.record_alloc(100);
577 a.record_realloc(100, 250);
578 let s = a.snapshot();
579 assert_eq!(s.alloc_count, 2);
580 assert_eq!(s.total_bytes, 250);
581 assert_eq!(s.current_bytes, 250);
582 assert_eq!(s.peak_bytes, 250);
583 }
584
585 #[test]
586 fn record_realloc_shrink_only_adjusts_current() {
587 let a = ModAlloc::new();
588 a.record_alloc(500);
589 a.record_realloc(500, 200);
590 let s = a.snapshot();
591 assert_eq!(s.alloc_count, 2);
592 assert_eq!(s.total_bytes, 500);
593 assert_eq!(s.current_bytes, 200);
594 assert_eq!(s.peak_bytes, 500);
595 }
596
597 #[test]
598 fn peak_holds_high_water_mark() {
599 let a = ModAlloc::new();
600 a.record_alloc(1000);
601 a.record_dealloc(1000);
602 a.record_alloc(500);
603 let s = a.snapshot();
604 assert_eq!(s.peak_bytes, 1000);
605 assert_eq!(s.current_bytes, 500);
606 }
607
608 #[test]
609 fn reentry_guard_blocks_nested_entry() {
610 let outer = ReentryGuard::enter();
611 assert!(outer.is_some());
612 let inner = ReentryGuard::enter();
613 assert!(inner.is_none(), "nested entry must be denied");
614 drop(outer);
615 let after = ReentryGuard::enter();
616 assert!(after.is_some(), "entry must be allowed after outer drops");
617 }
618
619 #[test]
620 fn profiler_start_stop_with_no_handle() {
621 let p = Profiler::start();
622 let s = p.stop();
623 assert_eq!(s.alloc_count, 0);
624 }
625}