rsprof_trace/
lib.rs

1//! Self-instrumentation library for rsprof.
2//!
3//! This crate provides CPU and heap profiling through self-instrumentation:
4//! - **CPU profiling**: Timer-based sampling using SIGPROF
5//! - **Heap profiling**: Custom allocator that tracks allocations
6//!
7//! # Usage
8//!
9//! Add to your `Cargo.toml`:
10//! ```toml
11//! [dependencies]
12//! rsprof-trace = { version = "0.1", features = ["profiling"] }
13//! ```
14//!
15//! Enable profiling with the `profiler!` macro:
16//! ```rust,ignore
17//! rsprof_trace::profiler!();  // CPU at 99Hz + heap profiling
18//! ```
19//!
20//! Or customize the CPU sampling frequency:
21//! ```rust,ignore
22//! rsprof_trace::profiler!(cpu = 199);  // CPU at 199Hz + heap profiling
23//! ```
24//!
25//! Build with frame pointers for accurate stack traces:
26//! ```bash
27//! RUSTFLAGS="-C force-frame-pointers=yes" cargo build --release --features profiling
28//! ```
29//!
30//! When the `profiling` feature is disabled, the macro expands to a no-op
31//! allocator passthrough with zero overhead.
32
33#![no_std]
34
35extern crate alloc;
36
37// Include profiling module when any profiling feature is enabled
38#[cfg(any(feature = "heap", feature = "cpu"))]
39mod profiling;
40
41// Re-export CPU profiling functions
42#[cfg(feature = "cpu")]
43pub use profiling::{start_cpu_profiling, stop_cpu_profiling};
44
45// Stubs when CPU feature is disabled
46#[cfg(not(feature = "cpu"))]
47#[inline]
48pub fn start_cpu_profiling(_freq_hz: u32) {}
49
50#[cfg(not(feature = "cpu"))]
51#[inline]
52pub fn stop_cpu_profiling() {}
53
54/// A profiling allocator that wraps the system allocator.
55///
56/// The const generic `CPU_FREQ` specifies the CPU sampling frequency in Hz.
57/// Set to 0 to disable CPU profiling.
58///
59/// When the `heap` feature is enabled, this allocator captures
60/// allocation and deallocation events along with stack traces.
61/// CPU profiling (if enabled) starts automatically on the first allocation.
62///
63/// When profiling features are disabled, it's a zero-cost passthrough.
64pub struct ProfilingAllocator<const CPU_FREQ: u32 = 99>;
65
66impl<const CPU_FREQ: u32> ProfilingAllocator<CPU_FREQ> {
67    pub const fn new() -> Self {
68        Self
69    }
70}
71
72impl<const CPU_FREQ: u32> Default for ProfilingAllocator<CPU_FREQ> {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78// Legacy alias for backwards compatibility
79pub type HeapProfiler = ProfilingAllocator<99>;
80
81#[cfg(not(feature = "heap"))]
82mod disabled {
83    use super::ProfilingAllocator;
84    use core::alloc::{GlobalAlloc, Layout};
85
86    unsafe impl<const CPU_FREQ: u32> GlobalAlloc for ProfilingAllocator<CPU_FREQ> {
87        #[inline]
88        unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
89            unsafe { libc::malloc(layout.size()) as *mut u8 }
90        }
91
92        #[inline]
93        unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) {
94            unsafe { libc::free(ptr as *mut libc::c_void) }
95        }
96
97        #[inline]
98        unsafe fn realloc(&self, ptr: *mut u8, _layout: Layout, new_size: usize) -> *mut u8 {
99            unsafe { libc::realloc(ptr as *mut libc::c_void, new_size) as *mut u8 }
100        }
101
102        #[inline]
103        unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
104            unsafe { libc::calloc(1, layout.size()) as *mut u8 }
105        }
106    }
107}
108
109#[cfg(feature = "heap")]
110mod enabled {
111    use super::ProfilingAllocator;
112    #[cfg(feature = "cpu")]
113    use super::profiling::start_cpu_profiling;
114    use super::profiling::{record_alloc, record_dealloc};
115    use core::alloc::{GlobalAlloc, Layout};
116    use core::sync::atomic::{AtomicBool, Ordering};
117
118    static CPU_INITIALIZED: AtomicBool = AtomicBool::new(false);
119
120    #[inline]
121    fn maybe_init_cpu<const FREQ: u32>() {
122        #[cfg(feature = "cpu")]
123        {
124            if FREQ > 0 && !CPU_INITIALIZED.swap(true, Ordering::SeqCst) {
125                start_cpu_profiling(FREQ);
126            }
127        }
128    }
129
130    // Minimum alignment guaranteed by malloc (typically 8 on 32-bit, 16 on 64-bit)
131    const MIN_ALIGN: usize = core::mem::size_of::<usize>() * 2;
132
133    /// Allocate with proper alignment using posix_memalign when needed
134    #[inline(never)]
135    unsafe fn aligned_malloc(size: usize, align: usize) -> *mut u8 {
136        if align <= MIN_ALIGN {
137            // malloc provides sufficient alignment
138            unsafe { libc::malloc(size) as *mut u8 }
139        } else {
140            // Need explicit alignment
141            let mut ptr: *mut libc::c_void = core::ptr::null_mut();
142            let ret = unsafe { libc::posix_memalign(&mut ptr, align, size) };
143            if ret == 0 {
144                ptr as *mut u8
145            } else {
146                core::ptr::null_mut()
147            }
148        }
149    }
150
151    unsafe impl<const CPU_FREQ: u32> GlobalAlloc for ProfilingAllocator<CPU_FREQ> {
152        // IMPORTANT: These must NOT be inlined!
153        // If inlined into libstd (which has no frame pointers), stack capture breaks.
154        #[inline(never)]
155        unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
156            maybe_init_cpu::<CPU_FREQ>();
157            let ptr = unsafe { aligned_malloc(layout.size(), layout.align()) };
158            if !ptr.is_null() {
159                record_alloc(ptr, layout.size());
160            }
161            ptr
162        }
163
164        #[inline(never)]
165        unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
166            record_dealloc(ptr, layout.size());
167            unsafe { libc::free(ptr as *mut libc::c_void) }
168        }
169
170        #[inline(never)]
171        unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
172            // realloc doesn't preserve alignment, so we need to alloc+copy+free
173            // for over-aligned types
174            if layout.align() > MIN_ALIGN {
175                let new_ptr = unsafe { aligned_malloc(new_size, layout.align()) };
176                if !new_ptr.is_null() {
177                    let copy_size = if new_size < layout.size() {
178                        new_size
179                    } else {
180                        layout.size()
181                    };
182                    unsafe { core::ptr::copy_nonoverlapping(ptr, new_ptr, copy_size) };
183                    record_dealloc(ptr, layout.size());
184                    unsafe { libc::free(ptr as *mut libc::c_void) };
185                    record_alloc(new_ptr, new_size);
186                }
187                new_ptr
188            } else {
189                record_dealloc(ptr, layout.size());
190                let new_ptr =
191                    unsafe { libc::realloc(ptr as *mut libc::c_void, new_size) as *mut u8 };
192                if !new_ptr.is_null() {
193                    record_alloc(new_ptr, new_size);
194                }
195                new_ptr
196            }
197        }
198
199        #[inline(never)]
200        unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
201            maybe_init_cpu::<CPU_FREQ>();
202            if layout.align() <= MIN_ALIGN {
203                let ptr = unsafe { libc::calloc(1, layout.size()) as *mut u8 };
204                if !ptr.is_null() {
205                    record_alloc(ptr, layout.size());
206                }
207                ptr
208            } else {
209                // calloc doesn't support alignment, use aligned_malloc + memset
210                let ptr = unsafe { aligned_malloc(layout.size(), layout.align()) };
211                if !ptr.is_null() {
212                    unsafe { core::ptr::write_bytes(ptr, 0, layout.size()) };
213                    record_alloc(ptr, layout.size());
214                }
215                ptr
216            }
217        }
218    }
219}
220
221/// Enable profiling for your application.
222///
223/// This macro sets up both CPU and heap profiling with sensible defaults.
224/// CPU profiling starts automatically on the first allocation.
225/// When the `profiling` feature is disabled, it expands to a zero-cost no-op.
226///
227/// # Examples
228///
229/// ```rust,ignore
230/// // Default: CPU at 99Hz + heap profiling
231/// rsprof_trace::profiler!();
232///
233/// // Custom CPU frequency
234/// rsprof_trace::profiler!(cpu = 199);
235/// ```
236///
237/// # Build
238///
239/// Enable profiling at build time:
240/// ```bash
241/// RUSTFLAGS="-C force-frame-pointers=yes" cargo build --release --features profiling
242/// ```
243#[macro_export]
244#[cfg(feature = "heap")]
245macro_rules! profiler {
246    () => {
247        $crate::profiler!(cpu = 99);
248    };
249    (cpu = $freq:expr) => {
250        #[global_allocator]
251        static __RSPROF_ALLOC: $crate::ProfilingAllocator<$freq> =
252            $crate::ProfilingAllocator::<$freq>::new();
253    };
254}
255
256/// No-op when heap feature is disabled (CPU-only not supported with this macro)
257#[macro_export]
258#[cfg(not(feature = "heap"))]
259macro_rules! profiler {
260    () => {};
261    (cpu = $freq:expr) => {};
262}