Skip to main content

tracing_prof/
allocator.rs

1//! An allocator that enables tracking of allocations and deallocations
2//! for allocation groups (e.g. spans).
3
4use std::{
5    alloc::{GlobalAlloc, Layout},
6    cell::{Cell, UnsafeCell},
7    mem,
8    sync::{Arc, OnceLock},
9};
10
11use heapless::FnvIndexMap;
12use tracing_core::span;
13
14use crate::events::{EventQueue, SpanMemoryUpdateEvent};
15
16/// The maximum number of allocation groups that can be tracked before
17/// they are flushed to the global tracker.
18///
19/// Since we flush fairly often, we can afford to have a small number of groups
20/// tracked at a time.
21const MAX_GROUPS: usize = 4;
22
23/// The maximum depth of the allocation group stack.
24const GROUP_STACK_SIZE: usize = 128;
25
26/// An allocation group identifier.
27///
28/// Internal use only, only exposed for testing purposes.
29#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
30#[doc(hidden)]
31pub struct AllocationGroup(u64);
32
33impl From<&span::Id> for AllocationGroup {
34    #[inline]
35    fn from(id: &span::Id) -> Self {
36        AllocationGroup(id.into_u64())
37    }
38}
39
40impl From<span::Id> for AllocationGroup {
41    #[inline]
42    fn from(id: span::Id) -> Self {
43        AllocationGroup(id.into_u64())
44    }
45}
46
47impl From<AllocationGroup> for span::Id {
48    #[inline]
49    fn from(group: AllocationGroup) -> Self {
50        span::Id::from_u64(group.0)
51    }
52}
53
54impl AllocationGroup {
55    #[inline]
56    #[must_use]
57    const fn should_track(self) -> bool {
58        self.0 != 0
59    }
60}
61
62pub(crate) type ThreadGroupStatisticsMap =
63    FnvIndexMap<AllocationGroup, GroupAllocationStatistics, MAX_GROUPS>;
64
65/// Allocation statistics for an allocation group (e.g. a span).
66///
67/// Allocations and deallocations are tracked separately, it is
68/// possible to have a group with a non-zero allocation count
69/// and a zero deallocation count, however the opposite is a bug.
70#[derive(Debug, Default, Clone, Copy)]
71pub(crate) struct GroupAllocationStatistics {
72    /// The number of allocations for this group.
73    pub(crate) allocation_count: u64,
74    /// The number of deallocations for this group.
75    pub(crate) deallocation_count: u64,
76    /// The number of bytes allocated for this group.
77    pub(crate) allocated_bytes: u64,
78    /// The number of bytes deallocated for this group.
79    pub(crate) deallocated_bytes: u64,
80}
81
82impl GroupAllocationStatistics {
83    /// The number of bytes allocated for this group.
84    #[must_use]
85    pub(crate) fn in_use_count(&self) -> u64 {
86        self.allocation_count
87            .saturating_sub(self.deallocation_count)
88    }
89
90    /// The number of bytes allocated for this group.
91    #[must_use]
92    pub(crate) fn in_use_bytes(&self) -> u64 {
93        self.allocated_bytes.saturating_sub(self.deallocated_bytes)
94    }
95}
96
97/// A tracker that collects allocation statistics from threads.
98pub(crate) trait AllocationTracker: Send + Sync + 'static {
99    /// Collect the allocation statistics from the thread.
100    fn collect(&self, allocations: ThreadGroupStatisticsMap);
101}
102
103/// The only implementation of allocation tracker for now.
104pub(crate) struct QueueAllocTracker {
105    pub(crate) events: Arc<EventQueue<SpanMemoryUpdateEvent>>,
106}
107
108impl AllocationTracker for QueueAllocTracker {
109    fn collect(&self, allocations: ThreadGroupStatisticsMap) {
110        for (group, allocs) in allocations {
111            let span_id = span::Id::from(group);
112
113            self.events.push(SpanMemoryUpdateEvent {
114                span_id,
115                stats: allocs,
116            });
117        }
118    }
119}
120
121static GLOBAL_TRACKER: OnceLock<QueueAllocTracker> = OnceLock::new();
122
123pub(crate) fn set_global_tracker(tracker: QueueAllocTracker) {
124    if GLOBAL_TRACKER.set(tracker).is_err() {
125        tracing::debug!("global allocation tracker was already set");
126    }
127}
128
129/// A thread local allocation statistics.
130pub(crate) struct ThreadAllocationStatistics {
131    groups: ThreadGroupStatisticsMap,
132}
133
134impl ThreadAllocationStatistics {
135    fn flush_global(&mut self) {
136        // SAFETY: We are dropping the thread local allocations, we don't need to
137        // worry about the global tracker.
138        if let Some(tracker) = GLOBAL_TRACKER.get() {
139            tracker.collect(mem::take(&mut self.groups));
140        } else {
141            // We don't have a global tracker, we can just drop the allocations.
142            self.groups.clear();
143        }
144    }
145}
146
147impl Drop for ThreadAllocationStatistics {
148    fn drop(&mut self) {
149        self.flush_global();
150    }
151}
152
153thread_local! {
154    static THREAD_GROUP_STATISTICS: UnsafeCell<ThreadAllocationStatistics> = const {
155        UnsafeCell::new(ThreadAllocationStatistics {
156            groups: ThreadGroupStatisticsMap::new(),
157        })
158    };
159
160    static THREAD_CURRENT_GROUP: Cell<AllocationGroup> = const {
161        Cell::new(AllocationGroup(0))
162    };
163
164    static THREAD_GROUP_STACK: UnsafeCell<heapless::Vec<AllocationGroup, GROUP_STACK_SIZE>> = const {
165        UnsafeCell::new(heapless::Vec::new())
166    }
167}
168
169/// Set the current allocation group for the thread.
170///
171/// Internal use only, only exposed for testing purposes.
172#[doc(hidden)]
173pub fn enter_allocation_group(new_group: AllocationGroup) {
174    let current_group = THREAD_CURRENT_GROUP.try_with(Cell::get).unwrap_or_default();
175
176    if current_group.should_track() {
177        // We are already tracking a group, push the current one
178        // to the stack.
179        let success = THREAD_GROUP_STACK
180            .try_with(|stack| {
181                // SAFETY: We access the stack for a short time, we don't alias the pointer.
182                unsafe { &mut *stack.get() }.push(current_group).is_ok()
183            })
184            .unwrap_or_default();
185
186        if !success {
187            no_track(|| {
188                tracing::warn!("maximum allocation group stack size reached");
189            });
190            return;
191        }
192    }
193
194    // Set the new group as the current one.
195    _ = THREAD_CURRENT_GROUP.try_with(|g| g.set(new_group));
196}
197
198/// Reset the current allocation group to the one that was set
199/// previously.
200///
201/// Internal use only, only exposed for testing purposes.
202#[doc(hidden)]
203pub fn exit_allocation_group() {
204    let last_group = THREAD_GROUP_STACK
205        .try_with(|stack| {
206            // SAFETY: We access the stack for a short time, we don't alias the pointer.
207            let stack = unsafe { &mut *stack.get() };
208            stack.pop()
209        })
210        .ok()
211        .flatten()
212        .unwrap_or_default();
213
214    // Set the new group as the current one.
215    _ = THREAD_CURRENT_GROUP.try_with(|g| g.set(last_group));
216}
217
218fn current_allocation_group() -> AllocationGroup {
219    THREAD_CURRENT_GROUP.try_with(Cell::get).unwrap_or_default()
220}
221
222/// Flush the statistics of the current thread so that it can be
223/// collected by the global tracker.
224///
225/// This usually does not need to be called manually, it is done
226/// automatically when the thread exits or a span is exited or closed.
227pub fn flush_thread_statistics() {
228    let allocs = THREAD_GROUP_STATISTICS
229        .try_with(|cell| {
230            // SAFETY: We access the statistics for a short time, we don't alias the pointer.
231            let thread_stats = unsafe { &mut *cell.get() };
232            mem::take(&mut thread_stats.groups)
233        })
234        .unwrap_or_default();
235
236    if allocs.is_empty() {
237        return;
238    }
239
240    if let Some(tracker) = GLOBAL_TRACKER.get() {
241        tracker.collect(allocs);
242    }
243}
244
245fn record_allocation(group: AllocationGroup, layout: Layout) {
246    _ = THREAD_GROUP_STATISTICS.try_with(|cell| {
247        // SAFETY: We access the statistics for a short time, we don't alias the pointer.
248        let thread_stats = unsafe { &mut *cell.get() };
249        if let Some(stats) = thread_stats.groups.get_mut(&group) {
250            stats.allocation_count += 1;
251            stats.allocated_bytes += layout.size() as u64;
252        } else {
253            let mut stats = GroupAllocationStatistics::default();
254            stats.allocation_count += 1;
255            stats.allocated_bytes = layout.size() as u64;
256            if let Err((k, v)) = thread_stats.groups.insert(group, stats) {
257                thread_stats.flush_global();
258
259                // Flush guarantees that the group is empty.
260                thread_stats.groups.insert(k, v).unwrap();
261            }
262        }
263    });
264}
265
266fn record_deallocation(group: AllocationGroup, layout: Layout) {
267    _ = THREAD_GROUP_STATISTICS.try_with(|cell| {
268        // SAFETY: We access the statistics for a short time, we don't alias the pointer.
269        let thread_stats = unsafe { &mut *cell.get() };
270        if let Some(stats) = thread_stats.groups.get_mut(&group) {
271            stats.deallocation_count += 1;
272            stats.deallocated_bytes += layout.size() as u64;
273        } else {
274            let mut stats = GroupAllocationStatistics::default();
275            stats.deallocation_count += 1;
276            stats.deallocated_bytes = layout.size() as u64;
277            if let Err((k, v)) = thread_stats.groups.insert(group, stats) {
278                thread_stats.flush_global();
279
280                // Flush guarantees that the group is empty.
281                thread_stats.groups.insert(k, v).unwrap();
282            }
283        }
284    });
285}
286
287/// Do not track any allocations in the given function on this thread.
288///
289/// The given function should be as small as possible and must not
290/// alter the allocation group stack or the current allocation group.
291pub(crate) fn no_track<F, T>(f: F) -> T
292where
293    F: FnOnce() -> T,
294{
295    let prev_group = THREAD_CURRENT_GROUP
296        .try_with(Cell::take)
297        .unwrap_or_default();
298    let result = f();
299    _ = THREAD_CURRENT_GROUP.try_with(|c| c.set(prev_group));
300    result
301}
302
303/// An allocator that tracks allocations and deallocations.
304///
305/// The allocator works by tagging each allocation with a group ID in
306/// the first 8 bytes of the allocation.
307///
308/// Allocations are first tracked for each thread locally then periodically
309/// reported to the global tracker via [`flush_thread_statistics`](flush_thread_statistics).
310///
311/// The allocations and deallocations are only reported if a group ID is set,
312/// however the tagging is always done, so a tiny memory and
313/// bookkeeping overhead is incurred.
314#[must_use]
315pub struct TrackingAllocator<A = std::alloc::System> {
316    inner: A,
317}
318
319impl<A> TrackingAllocator<A> {
320    /// Create a new tracking allocator.
321    pub const fn new(inner: A) -> Self {
322        TrackingAllocator { inner }
323    }
324}
325
326impl TrackingAllocator<std::alloc::System> {
327    /// Create a new tracking allocator that uses the system allocator.
328    pub const fn system() -> Self {
329        TrackingAllocator::new(std::alloc::System)
330    }
331}
332
333// SAFETY: This mostly wraps the inner allocator, pointer manipulations are
334// carefully done and documented.
335unsafe impl<A: GlobalAlloc> GlobalAlloc for TrackingAllocator<A> {
336    unsafe fn alloc(&self, object_layout: Layout) -> *mut u8 {
337        // Allocate our wrapped layout and make sure the allocation succeeded.
338        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);
339        // SAFETY: We know that `actual_layout` is valid and aligned for the allocation.
340        let wrapped_ptr = unsafe { self.inner.alloc(wrapped_layout) };
341        if wrapped_ptr.is_null() {
342            return std::ptr::null_mut();
343        }
344
345        // SAFETY: We know that the first field of the allocation is a `u64` and that
346        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
347        #[allow(clippy::cast_ptr_alignment)]
348        let group_id_ptr = wrapped_ptr.cast::<u64>();
349
350        let group = current_allocation_group();
351
352        if group.should_track() {
353            record_allocation(group, object_layout);
354        }
355
356        // SAFETY: We know that `group_id_ptr` is valid and aligned for writing a `u64` value.
357        unsafe {
358            group_id_ptr.write(group.0);
359        }
360
361        // SAFETY: If the allocation succeeded and `actual_ptr` is valid, then it must be valid to advance by
362        // `offset_to_object` as it would land within the allocation.
363        wrapped_ptr.wrapping_add(offset_to_object)
364    }
365
366    unsafe fn dealloc(&self, ptr: *mut u8, object_layout: Layout) {
367        // SAFETY: We did this in the allocation path unconditionally, so we can always
368        // assume that all allocations were wrapped with our header.
369        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);
370
371        // SAFETY: The pointer to deallocate references the object layout as we returned it
372        // from the allocation path. Since we know that the layout was extended with our
373        // header, we can safely subtract the offset to the object from the pointer to get the
374        // pointer to the header.
375        let wrapped_ptr = ptr.wrapping_sub(offset_to_object);
376
377        // SAFETY: We know that the first field of the allocation is a `u64` and that
378        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
379        #[allow(clippy::cast_ptr_alignment)]
380        let group_id_ptr = wrapped_ptr.cast::<u64>();
381
382        // SAFETY: We know that `group_id_ptr` is valid and aligned for reading a `u64` value.
383        let group = AllocationGroup(unsafe { group_id_ptr.read() });
384
385        // SAFETY: We know that `wrapped_ptr` is valid and aligned for the allocation.
386        unsafe {
387            self.inner.dealloc(wrapped_ptr, wrapped_layout);
388        }
389
390        if group.should_track() {
391            record_deallocation(group, object_layout);
392        }
393    }
394
395    unsafe fn alloc_zeroed(&self, object_layout: Layout) -> *mut u8 {
396        // Allocate our wrapped layout and make sure the allocation succeeded.
397        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);
398        // SAFETY: We know that `actual_layout` is valid and aligned for the allocation.
399        let wrapped_ptr = unsafe { self.inner.alloc_zeroed(wrapped_layout) };
400        if wrapped_ptr.is_null() {
401            return std::ptr::null_mut();
402        }
403
404        // SAFETY: We know that the first field of the allocation is a `u64` and that
405        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
406        #[allow(clippy::cast_ptr_alignment)]
407        let group_id_ptr = wrapped_ptr.cast::<u64>();
408
409        let group = current_allocation_group();
410
411        if group.should_track() {
412            record_allocation(group, object_layout);
413        }
414
415        // SAFETY: We know that `group_id_ptr` is valid and aligned for writing a `u64` value.
416        unsafe {
417            group_id_ptr.write(group.0);
418        }
419
420        // SAFETY: If the allocation succeeded and `actual_ptr` is valid, then it must be valid to advance by
421        // `offset_to_object` as it would land within the allocation.
422        wrapped_ptr.wrapping_add(offset_to_object)
423    }
424
425    unsafe fn realloc(&self, ptr: *mut u8, object_layout: Layout, new_size: usize) -> *mut u8 {
426        // SAFETY: We did this in the allocation path unconditionally, so we can always
427        // assume that all allocations were wrapped with our header.
428        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);
429
430        // Make sure to always reserve space for the header.
431        let new_size = new_size + mem::size_of::<u64>();
432
433        // SAFETY: The pointer to deallocate references the object layout as we returned it
434        // from the allocation path. Since we know that the layout was extended with our
435        // header, we can safely subtract the offset to the object from the pointer to get the
436        // pointer to the header.
437        let wrapped_ptr = ptr.wrapping_sub(offset_to_object);
438
439        // SAFETY: We know that the first field of the allocation is a `u64` and that
440        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
441        #[allow(clippy::cast_ptr_alignment)]
442        let group_id_ptr = wrapped_ptr.cast::<u64>();
443
444        // SAFETY: We know that `group_id_ptr` is valid and aligned for reading a `u64` value.
445        let group = AllocationGroup(unsafe { group_id_ptr.read() });
446
447        // We treat reallocation as a deallocation followed by an allocation.
448        if group.should_track() {
449            record_deallocation(group, object_layout);
450
451            let new_layout = Layout::from_size_align(new_size, object_layout.align())
452                .expect("reallocation requested layout resulted in overflow");
453
454            record_allocation(group, new_layout);
455        }
456
457        // SAFETY: We know that `wrapped_ptr` is valid and aligned for the allocation.
458        let wrapped_ptr = unsafe { self.inner.realloc(wrapped_ptr, wrapped_layout, new_size) };
459
460        if wrapped_ptr.is_null() {
461            return std::ptr::null_mut();
462        }
463
464        // SAFETY: We know that the first field of the allocation is a `u64` and that
465        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
466        #[allow(clippy::cast_ptr_alignment)]
467        let group_id_ptr = wrapped_ptr.cast::<u64>();
468
469        // SAFETY: We know that `group_id_ptr` is valid and aligned for writing a `u64` value.
470        // We need to write the group id again, since the pointer may have changed.
471        unsafe {
472            group_id_ptr.write(group.0);
473        }
474
475        // SAFETY: If the allocation succeeded and `actual_ptr` is valid, then it must be valid to advance by
476        // `offset_to_object` as it would land within the allocation.
477        wrapped_ptr.wrapping_add(offset_to_object)
478    }
479}
480
481fn get_wrapped_layout(layout: Layout) -> (Layout, usize) {
482    static HEADER_LAYOUT: Layout = Layout::new::<u64>();
483
484    // Generate a layout that reserves space for the header before the actual
485    // allocation.
486    let (wrapped_layout, offset_to_object) = HEADER_LAYOUT
487        .extend(layout)
488        .expect("wrapping requested layout resulted in overflow");
489    let wrapped_layout = wrapped_layout.pad_to_align();
490
491    (wrapped_layout, offset_to_object)
492}