Skip to main content

smelt_perf/
alloc.rs

1//! Counting global-allocator shim. Process-wide counters always update so peak / current /
2//! dealloc stats stay consistent across the whole run; per-thread tallies (used by
3//! [`crate::perf::Guard`]) only update when [`enable`] has been called. Install via
4//! `#[global_allocator]` in the binary crate.
5
6use std::alloc::{GlobalAlloc, Layout, System};
7use std::cell::Cell;
8use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering::Relaxed};
9
10static ENABLED: AtomicBool = AtomicBool::new(false);
11
12static ALLOC_COUNT: AtomicU64 = AtomicU64::new(0);
13static DEALLOC_COUNT: AtomicU64 = AtomicU64::new(0);
14static REALLOC_COUNT: AtomicU64 = AtomicU64::new(0);
15static BYTES_ALLOCATED: AtomicU64 = AtomicU64::new(0);
16static BYTES_DEALLOCATED: AtomicU64 = AtomicU64::new(0);
17static CURRENT_BYTES: AtomicUsize = AtomicUsize::new(0);
18static PEAK_BYTES: AtomicUsize = AtomicUsize::new(0);
19
20thread_local! {
21    static T_ALLOCS: Cell<u64> = const { Cell::new(0) };
22    static T_BYTES: Cell<u64> = const { Cell::new(0) };
23}
24
25pub fn enable() {
26    ENABLED.store(true, Relaxed);
27}
28
29pub fn enabled() -> bool {
30    ENABLED.load(Relaxed)
31}
32
33pub fn set_enabled(on: bool) {
34    ENABLED.store(on, Relaxed);
35}
36
37/// Calling-thread `(alloc_count, alloc_bytes_grown)` totals. Monotonic; take deltas.
38/// Used by [`crate::perf::Guard`] to attribute allocs to the thread doing the work.
39pub fn thread_snapshot() -> (u64, u64) {
40    let a = T_ALLOCS.try_with(|c| c.get()).unwrap_or(0);
41    let b = T_BYTES.try_with(|c| c.get()).unwrap_or(0);
42    (a, b)
43}
44
45#[derive(Debug, Clone, Copy, Default)]
46pub struct AllocStats {
47    pub allocs: u64,
48    pub deallocs: u64,
49    pub reallocs: u64,
50    pub bytes_allocated: u64,
51    pub bytes_deallocated: u64,
52    pub current_bytes: usize,
53    pub peak_bytes: usize,
54}
55
56/// Process-wide cumulative allocation stats. Subtract two snapshots for a phase delta.
57pub fn snapshot() -> AllocStats {
58    AllocStats {
59        allocs: ALLOC_COUNT.load(Relaxed),
60        deallocs: DEALLOC_COUNT.load(Relaxed),
61        reallocs: REALLOC_COUNT.load(Relaxed),
62        bytes_allocated: BYTES_ALLOCATED.load(Relaxed),
63        bytes_deallocated: BYTES_DEALLOCATED.load(Relaxed),
64        current_bytes: CURRENT_BYTES.load(Relaxed),
65        peak_bytes: PEAK_BYTES.load(Relaxed),
66    }
67}
68
69pub fn delta(start: AllocStats, end: AllocStats) -> AllocStats {
70    AllocStats {
71        allocs: end.allocs.saturating_sub(start.allocs),
72        deallocs: end.deallocs.saturating_sub(start.deallocs),
73        reallocs: end.reallocs.saturating_sub(start.reallocs),
74        bytes_allocated: end.bytes_allocated.saturating_sub(start.bytes_allocated),
75        bytes_deallocated: end
76            .bytes_deallocated
77            .saturating_sub(start.bytes_deallocated),
78        current_bytes: end.current_bytes,
79        peak_bytes: end.peak_bytes,
80    }
81}
82
83pub struct Counting;
84
85unsafe impl GlobalAlloc for Counting {
86    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
87        let p = unsafe { System.alloc(layout) };
88        if !p.is_null() {
89            let size = layout.size();
90            ALLOC_COUNT.fetch_add(1, Relaxed);
91            BYTES_ALLOCATED.fetch_add(size as u64, Relaxed);
92            let cur = CURRENT_BYTES.fetch_add(size, Relaxed) + size;
93            update_peak(cur);
94            if ENABLED.load(Relaxed) {
95                // `try_with` because the allocator can run during TLS teardown.
96                let _ = T_ALLOCS.try_with(|c| c.set(c.get() + 1));
97                let _ = T_BYTES.try_with(|c| c.set(c.get() + size as u64));
98            }
99        }
100        p
101    }
102
103    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
104        unsafe { System.dealloc(ptr, layout) };
105        DEALLOC_COUNT.fetch_add(1, Relaxed);
106        BYTES_DEALLOCATED.fetch_add(layout.size() as u64, Relaxed);
107        CURRENT_BYTES.fetch_sub(layout.size(), Relaxed);
108    }
109
110    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
111        let p = unsafe { System.alloc_zeroed(layout) };
112        if !p.is_null() {
113            let size = layout.size();
114            ALLOC_COUNT.fetch_add(1, Relaxed);
115            BYTES_ALLOCATED.fetch_add(size as u64, Relaxed);
116            let cur = CURRENT_BYTES.fetch_add(size, Relaxed) + size;
117            update_peak(cur);
118            if ENABLED.load(Relaxed) {
119                let _ = T_ALLOCS.try_with(|c| c.set(c.get() + 1));
120                let _ = T_BYTES.try_with(|c| c.set(c.get() + size as u64));
121            }
122        }
123        p
124    }
125
126    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
127        let p = unsafe { System.realloc(ptr, layout, new_size) };
128        if !p.is_null() {
129            REALLOC_COUNT.fetch_add(1, Relaxed);
130            let old = layout.size();
131            if new_size >= old {
132                let grown = new_size - old;
133                BYTES_ALLOCATED.fetch_add(grown as u64, Relaxed);
134                let cur = CURRENT_BYTES.fetch_add(grown, Relaxed) + grown;
135                update_peak(cur);
136                if ENABLED.load(Relaxed) {
137                    let _ = T_ALLOCS.try_with(|c| c.set(c.get() + 1));
138                    let _ = T_BYTES.try_with(|c| c.set(c.get() + grown as u64));
139                }
140            } else {
141                let shrunk = old - new_size;
142                BYTES_DEALLOCATED.fetch_add(shrunk as u64, Relaxed);
143                CURRENT_BYTES.fetch_sub(shrunk, Relaxed);
144                if ENABLED.load(Relaxed) {
145                    let _ = T_ALLOCS.try_with(|c| c.set(c.get() + 1));
146                }
147            }
148        }
149        p
150    }
151}
152
153fn update_peak(cur: usize) {
154    let mut peak = PEAK_BYTES.load(Relaxed);
155    while cur > peak {
156        match PEAK_BYTES.compare_exchange_weak(peak, cur, Relaxed, Relaxed) {
157            Ok(_) => break,
158            Err(p) => peak = p,
159        }
160    }
161}