tracing-prof 0.3.0

//! An allocator that enables tracking of allocations and deallocations
//! for allocation groups (e.g. spans).

use std::{
    alloc::{GlobalAlloc, Layout},
    cell::{Cell, UnsafeCell},
    mem,
    sync::{Arc, OnceLock},
};

use heapless::FnvIndexMap;
use tracing_core::span;

use crate::events::{EventQueue, SpanMemoryUpdateEvent};

/// The maximum number of allocation groups that can be tracked before
/// they are flushed to the global tracker.
///
/// Since we flush fairly often, we can afford to have a small number of groups
/// tracked at a time.
const MAX_GROUPS: usize = 4;

/// The maximum depth of the allocation group stack.
const GROUP_STACK_SIZE: usize = 128;

/// An allocation group identifier.
///
/// Internal use only, only exposed for testing purposes.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
#[doc(hidden)]
pub struct AllocationGroup(u64);

impl From<&span::Id> for AllocationGroup {
    #[inline]
    fn from(id: &span::Id) -> Self {
        AllocationGroup(id.into_u64())
    }
}

impl From<span::Id> for AllocationGroup {
    #[inline]
    fn from(id: span::Id) -> Self {
        AllocationGroup(id.into_u64())
    }
}

impl From<AllocationGroup> for span::Id {
    #[inline]
    fn from(group: AllocationGroup) -> Self {
        span::Id::from_u64(group.0)
    }
}

impl AllocationGroup {
    #[inline]
    #[must_use]
    const fn should_track(self) -> bool {
        self.0 != 0
    }
}

pub(crate) type ThreadGroupStatisticsMap =
    FnvIndexMap<AllocationGroup, GroupAllocationStatistics, MAX_GROUPS>;

/// Allocation statistics for an allocation group (e.g. a span).
///
/// Allocations and deallocations are tracked separately, it is
/// possible to have a group with a non-zero allocation count
/// and a zero deallocation count, however the opposite is a bug.
#[derive(Debug, Default, Clone, Copy)]
pub(crate) struct GroupAllocationStatistics {
    /// The number of allocations for this group.
    pub(crate) allocation_count: u64,
    /// The number of deallocations for this group.
    pub(crate) deallocation_count: u64,
    /// The number of bytes allocated for this group.
    pub(crate) allocated_bytes: u64,
    /// The number of bytes deallocated for this group.
    pub(crate) deallocated_bytes: u64,
}

impl GroupAllocationStatistics {
    /// The number of bytes allocated for this group.
    #[must_use]
    pub(crate) fn in_use_count(&self) -> u64 {
        self.allocation_count
            .saturating_sub(self.deallocation_count)
    }

    /// The number of bytes allocated for this group.
    #[must_use]
    pub(crate) fn in_use_bytes(&self) -> u64 {
        self.allocated_bytes.saturating_sub(self.deallocated_bytes)
    }
}

/// A tracker that collects allocation statistics from threads.
pub(crate) trait AllocationTracker: Send + Sync + 'static {
    /// Collect the allocation statistics from the thread.
    fn collect(&self, allocations: ThreadGroupStatisticsMap);
}

/// The only implementation of allocation tracker for now.
pub(crate) struct QueueAllocTracker {
    pub(crate) events: Arc<EventQueue<SpanMemoryUpdateEvent>>,
}

impl AllocationTracker for QueueAllocTracker {
    fn collect(&self, allocations: ThreadGroupStatisticsMap) {
        for (group, allocs) in allocations {
            let span_id = span::Id::from(group);

            self.events.push(SpanMemoryUpdateEvent {
                span_id,
                stats: allocs,
            });
        }
    }
}

static GLOBAL_TRACKER: OnceLock<QueueAllocTracker> = OnceLock::new();

pub(crate) fn set_global_tracker(tracker: QueueAllocTracker) {
    if GLOBAL_TRACKER.set(tracker).is_err() {
        tracing::debug!("global allocation tracker was already set");
    }
}

/// A thread local allocation statistics.
pub(crate) struct ThreadAllocationStatistics {
    groups: ThreadGroupStatisticsMap,
}

impl ThreadAllocationStatistics {
    fn flush_global(&mut self) {
        // SAFETY: We are dropping the thread local allocations, we don't need to
        // worry about the global tracker.
        if let Some(tracker) = GLOBAL_TRACKER.get() {
            tracker.collect(mem::take(&mut self.groups));
        } else {
            // We don't have a global tracker, we can just drop the allocations.
            self.groups.clear();
        }
    }
}

impl Drop for ThreadAllocationStatistics {
    fn drop(&mut self) {
        self.flush_global();
    }
}

thread_local! {
    static THREAD_GROUP_STATISTICS: UnsafeCell<ThreadAllocationStatistics> = const {
        UnsafeCell::new(ThreadAllocationStatistics {
            groups: ThreadGroupStatisticsMap::new(),
        })
    };

    static THREAD_CURRENT_GROUP: Cell<AllocationGroup> = const {
        Cell::new(AllocationGroup(0))
    };

    static THREAD_GROUP_STACK: UnsafeCell<heapless::Vec<AllocationGroup, GROUP_STACK_SIZE>> = const {
        UnsafeCell::new(heapless::Vec::new())
    }
}

/// Set the current allocation group for the thread.
///
/// Internal use only, only exposed for testing purposes.
#[doc(hidden)]
pub fn enter_allocation_group(new_group: AllocationGroup) {
    let current_group = THREAD_CURRENT_GROUP.try_with(Cell::get).unwrap_or_default();

    if current_group.should_track() {
        // We are already tracking a group, push the current one
        // to the stack.
        let success = THREAD_GROUP_STACK
            .try_with(|stack| {
                // SAFETY: We access the stack for a short time, we don't alias the pointer.
                unsafe { &mut *stack.get() }.push(current_group).is_ok()
            })
            .unwrap_or_default();

        if !success {
            no_track(|| {
                tracing::warn!("maximum allocation group stack size reached");
            });
            return;
        }
    }

    // Set the new group as the current one.
    _ = THREAD_CURRENT_GROUP.try_with(|g| g.set(new_group));
}

/// Reset the current allocation group to the one that was set
/// previously.
///
/// Internal use only, only exposed for testing purposes.
#[doc(hidden)]
pub fn exit_allocation_group() {
    let last_group = THREAD_GROUP_STACK
        .try_with(|stack| {
            // SAFETY: We access the stack for a short time, we don't alias the pointer.
            let stack = unsafe { &mut *stack.get() };
            stack.pop()
        })
        .ok()
        .flatten()
        .unwrap_or_default();

    // Set the new group as the current one.
    _ = THREAD_CURRENT_GROUP.try_with(|g| g.set(last_group));
}

fn current_allocation_group() -> AllocationGroup {
    THREAD_CURRENT_GROUP.try_with(Cell::get).unwrap_or_default()
}

/// Flush the statistics of the current thread so that it can be
/// collected by the global tracker.
///
/// This usually does not need to be called manually, it is done
/// automatically when the thread exits or a span is exited or closed.
pub fn flush_thread_statistics() {
    let allocs = THREAD_GROUP_STATISTICS
        .try_with(|cell| {
            // SAFETY: We access the statistics for a short time, we don't alias the pointer.
            let thread_stats = unsafe { &mut *cell.get() };
            mem::take(&mut thread_stats.groups)
        })
        .unwrap_or_default();

    if allocs.is_empty() {
        return;
    }

    if let Some(tracker) = GLOBAL_TRACKER.get() {
        tracker.collect(allocs);
    }
}

fn record_allocation(group: AllocationGroup, layout: Layout) {
    _ = THREAD_GROUP_STATISTICS.try_with(|cell| {
        // SAFETY: We access the statistics for a short time, we don't alias the pointer.
        let thread_stats = unsafe { &mut *cell.get() };
        if let Some(stats) = thread_stats.groups.get_mut(&group) {
            stats.allocation_count += 1;
            stats.allocated_bytes += layout.size() as u64;
        } else {
            let mut stats = GroupAllocationStatistics::default();
            stats.allocation_count += 1;
            stats.allocated_bytes = layout.size() as u64;
            if let Err((k, v)) = thread_stats.groups.insert(group, stats) {
                thread_stats.flush_global();

                // Flush guarantees that the group is empty.
                thread_stats.groups.insert(k, v).unwrap();
            }
        }
    });
}

fn record_deallocation(group: AllocationGroup, layout: Layout) {
    _ = THREAD_GROUP_STATISTICS.try_with(|cell| {
        // SAFETY: We access the statistics for a short time, we don't alias the pointer.
        let thread_stats = unsafe { &mut *cell.get() };
        if let Some(stats) = thread_stats.groups.get_mut(&group) {
            stats.deallocation_count += 1;
            stats.deallocated_bytes += layout.size() as u64;
        } else {
            let mut stats = GroupAllocationStatistics::default();
            stats.deallocation_count += 1;
            stats.deallocated_bytes = layout.size() as u64;
            if let Err((k, v)) = thread_stats.groups.insert(group, stats) {
                thread_stats.flush_global();

                // Flush guarantees that the group is empty.
                thread_stats.groups.insert(k, v).unwrap();
            }
        }
    });
}

/// Do not track any allocations in the given function on this thread.
///
/// The given function should be as small as possible and must not
/// alter the allocation group stack or the current allocation group.
pub(crate) fn no_track<F, T>(f: F) -> T
where
    F: FnOnce() -> T,
{
    let prev_group = THREAD_CURRENT_GROUP
        .try_with(Cell::take)
        .unwrap_or_default();
    let result = f();
    _ = THREAD_CURRENT_GROUP.try_with(|c| c.set(prev_group));
    result
}

/// An allocator that tracks allocations and deallocations.
///
/// The allocator works by tagging each allocation with a group ID in
/// the first 8 bytes of the allocation.
///
/// Allocations are first tracked for each thread locally then periodically
/// reported to the global tracker via [`flush_thread_statistics`](flush_thread_statistics).
///
/// The allocations and deallocations are only reported if a group ID is set,
/// however the tagging is always done, so a tiny memory and
/// bookkeeping overhead is incurred.
#[must_use]
pub struct TrackingAllocator<A = std::alloc::System> {
    inner: A,
}

impl<A> TrackingAllocator<A> {
    /// Create a new tracking allocator.
    pub const fn new(inner: A) -> Self {
        TrackingAllocator { inner }
    }
}

impl TrackingAllocator<std::alloc::System> {
    /// Create a new tracking allocator that uses the system allocator.
    pub const fn system() -> Self {
        TrackingAllocator::new(std::alloc::System)
    }
}

// SAFETY: This mostly wraps the inner allocator, pointer manipulations are
// carefully done and documented.
unsafe impl<A: GlobalAlloc> GlobalAlloc for TrackingAllocator<A> {
    unsafe fn alloc(&self, object_layout: Layout) -> *mut u8 {
        // Allocate our wrapped layout and make sure the allocation succeeded.
        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);
        // SAFETY: We know that `actual_layout` is valid and aligned for the allocation.
        let wrapped_ptr = unsafe { self.inner.alloc(wrapped_layout) };
        if wrapped_ptr.is_null() {
            return std::ptr::null_mut();
        }

        // SAFETY: We know that the first field of the allocation is a `u64` and that
        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
        #[allow(clippy::cast_ptr_alignment)]
        let group_id_ptr = wrapped_ptr.cast::<u64>();

        let group = current_allocation_group();

        if group.should_track() {
            record_allocation(group, object_layout);
        }

        // SAFETY: We know that `group_id_ptr` is valid and aligned for writing a `u64` value.
        unsafe {
            group_id_ptr.write(group.0);
        }

        // SAFETY: If the allocation succeeded and `actual_ptr` is valid, then it must be valid to advance by
        // `offset_to_object` as it would land within the allocation.
        wrapped_ptr.wrapping_add(offset_to_object)
    }

    unsafe fn dealloc(&self, ptr: *mut u8, object_layout: Layout) {
        // SAFETY: We did this in the allocation path unconditionally, so we can always
        // assume that all allocations were wrapped with our header.
        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);

        // SAFETY: The pointer to deallocate references the object layout as we returned it
        // from the allocation path. Since we know that the layout was extended with our
        // header, we can safely subtract the offset to the object from the pointer to get the
        // pointer to the header.
        let wrapped_ptr = ptr.wrapping_sub(offset_to_object);

        // SAFETY: We know that the first field of the allocation is a `u64` and that
        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
        #[allow(clippy::cast_ptr_alignment)]
        let group_id_ptr = wrapped_ptr.cast::<u64>();

        // SAFETY: We know that `group_id_ptr` is valid and aligned for reading a `u64` value.
        let group = AllocationGroup(unsafe { group_id_ptr.read() });

        // SAFETY: We know that `wrapped_ptr` is valid and aligned for the allocation.
        unsafe {
            self.inner.dealloc(wrapped_ptr, wrapped_layout);
        }

        if group.should_track() {
            record_deallocation(group, object_layout);
        }
    }

    unsafe fn alloc_zeroed(&self, object_layout: Layout) -> *mut u8 {
        // Allocate our wrapped layout and make sure the allocation succeeded.
        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);
        // SAFETY: We know that `actual_layout` is valid and aligned for the allocation.
        let wrapped_ptr = unsafe { self.inner.alloc_zeroed(wrapped_layout) };
        if wrapped_ptr.is_null() {
            return std::ptr::null_mut();
        }

        // SAFETY: We know that the first field of the allocation is a `u64` and that
        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
        #[allow(clippy::cast_ptr_alignment)]
        let group_id_ptr = wrapped_ptr.cast::<u64>();

        let group = current_allocation_group();

        if group.should_track() {
            record_allocation(group, object_layout);
        }

        // SAFETY: We know that `group_id_ptr` is valid and aligned for writing a `u64` value.
        unsafe {
            group_id_ptr.write(group.0);
        }

        // SAFETY: If the allocation succeeded and `actual_ptr` is valid, then it must be valid to advance by
        // `offset_to_object` as it would land within the allocation.
        wrapped_ptr.wrapping_add(offset_to_object)
    }

    unsafe fn realloc(&self, ptr: *mut u8, object_layout: Layout, new_size: usize) -> *mut u8 {
        // SAFETY: We did this in the allocation path unconditionally, so we can always
        // assume that all allocations were wrapped with our header.
        let (wrapped_layout, offset_to_object) = get_wrapped_layout(object_layout);

        // Make sure to always reserve space for the header.
        let new_size = new_size + mem::size_of::<u64>();

        // SAFETY: The pointer to deallocate references the object layout as we returned it
        // from the allocation path. Since we know that the layout was extended with our
        // header, we can safely subtract the offset to the object from the pointer to get the
        // pointer to the header.
        let wrapped_ptr = ptr.wrapping_sub(offset_to_object);

        // SAFETY: We know that the first field of the allocation is a `u64` and that
        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
        #[allow(clippy::cast_ptr_alignment)]
        let group_id_ptr = wrapped_ptr.cast::<u64>();

        // SAFETY: We know that `group_id_ptr` is valid and aligned for reading a `u64` value.
        let group = AllocationGroup(unsafe { group_id_ptr.read() });

        // We treat reallocation as a deallocation followed by an allocation.
        if group.should_track() {
            record_deallocation(group, object_layout);

            let new_layout = Layout::from_size_align(new_size, object_layout.align())
                .expect("reallocation requested layout resulted in overflow");

            record_allocation(group, new_layout);
        }

        // SAFETY: We know that `wrapped_ptr` is valid and aligned for the allocation.
        let wrapped_ptr = unsafe { self.inner.realloc(wrapped_ptr, wrapped_layout, new_size) };

        if wrapped_ptr.is_null() {
            return std::ptr::null_mut();
        }

        // SAFETY: We know that the first field of the allocation is a `u64` and that
        // `wrapped_ptr` is valid and aligned for writing a `u64` value.
        #[allow(clippy::cast_ptr_alignment)]
        let group_id_ptr = wrapped_ptr.cast::<u64>();

        // SAFETY: We know that `group_id_ptr` is valid and aligned for writing a `u64` value.
        // We need to write the group id again, since the pointer may have changed.
        unsafe {
            group_id_ptr.write(group.0);
        }

        // SAFETY: If the allocation succeeded and `actual_ptr` is valid, then it must be valid to advance by
        // `offset_to_object` as it would land within the allocation.
        wrapped_ptr.wrapping_add(offset_to_object)
    }
}

fn get_wrapped_layout(layout: Layout) -> (Layout, usize) {
    static HEADER_LAYOUT: Layout = Layout::new::<u64>();

    // Generate a layout that reserves space for the header before the actual
    // allocation.
    let (wrapped_layout, offset_to_object) = HEADER_LAYOUT
        .extend(layout)
        .expect("wrapping requested layout resulted in overflow");
    let wrapped_layout = wrapped_layout.pad_to_align();

    (wrapped_layout, offset_to_object)
}