Skip to main content

axonml_profile/
lib.rs

1//! axonml-profile - Profiling Tools for Axonml ML Framework
2//!
3//! Provides comprehensive profiling capabilities for neural network training
4//! and inference, including memory tracking, compute profiling, and bottleneck detection.
5//!
6//! # Key Features
7//! - Memory profiler: Track allocations, peak usage, and memory leaks
8//! - Compute profiler: Measure operation times, FLOPS, and throughput
9//! - Timeline profiler: Record events with timestamps for visualization
10//! - Bottleneck detection: Identify performance bottlenecks automatically
11//!
12//! # Example
13//! ```ignore
14//! use axonml_profile::{Profiler, MemoryProfiler, ComputeProfiler};
15//!
16//! // Create a profiler
17//! let profiler = Profiler::new();
18//!
19//! // Profile a forward pass
20//! profiler.start("forward_pass");
21//! let output = model.forward(&input);
22//! profiler.stop("forward_pass");
23//!
24//! // Print summary
25//! profiler.summary();
26//! ```
27//!
28//! @version 0.1.0
29//! @author AutomataNexus Development Team
30
31#![warn(missing_docs)]
32#![warn(clippy::all)]
33
34pub mod error;
35pub mod memory;
36pub mod compute;
37pub mod timeline;
38pub mod bottleneck;
39pub mod report;
40
41pub use error::{ProfileError, ProfileResult};
42pub use memory::{MemoryProfiler, MemoryStats, AllocationRecord};
43pub use compute::{ComputeProfiler, OperationStats, ProfiledOp};
44pub use timeline::{TimelineProfiler, Event, EventType};
45pub use bottleneck::{BottleneckAnalyzer, Bottleneck, BottleneckType};
46pub use report::{ProfileReport, ReportFormat};
47
48use std::sync::Arc;
49use parking_lot::RwLock;
50
51// =============================================================================
52// Unified Profiler
53// =============================================================================
54
55/// Unified profiler combining memory, compute, and timeline profiling.
56#[derive(Debug)]
57pub struct Profiler {
58    /// Memory profiler instance
59    pub memory: Arc<RwLock<MemoryProfiler>>,
60    /// Compute profiler instance
61    pub compute: Arc<RwLock<ComputeProfiler>>,
62    /// Timeline profiler instance
63    pub timeline: Arc<RwLock<TimelineProfiler>>,
64    /// Whether profiling is enabled
65    enabled: bool,
66}
67
68impl Default for Profiler {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74impl Profiler {
75    /// Creates a new unified profiler.
76    pub fn new() -> Self {
77        Self {
78            memory: Arc::new(RwLock::new(MemoryProfiler::new())),
79            compute: Arc::new(RwLock::new(ComputeProfiler::new())),
80            timeline: Arc::new(RwLock::new(TimelineProfiler::new())),
81            enabled: true,
82        }
83    }
84
85    /// Enables or disables profiling.
86    pub fn set_enabled(&mut self, enabled: bool) {
87        self.enabled = enabled;
88    }
89
90    /// Returns whether profiling is enabled.
91    pub fn is_enabled(&self) -> bool {
92        self.enabled
93    }
94
95    /// Starts profiling an operation.
96    pub fn start(&self, name: &str) {
97        if self.enabled {
98            self.compute.write().start(name);
99            self.timeline.write().record(name, EventType::Start);
100        }
101    }
102
103    /// Stops profiling an operation.
104    pub fn stop(&self, name: &str) {
105        if self.enabled {
106            self.compute.write().stop(name);
107            self.timeline.write().record(name, EventType::End);
108        }
109    }
110
111    /// Records a memory allocation.
112    pub fn record_alloc(&self, name: &str, bytes: usize) {
113        if self.enabled {
114            self.memory.write().record_alloc(name, bytes);
115        }
116    }
117
118    /// Records a memory deallocation.
119    pub fn record_free(&self, name: &str, bytes: usize) {
120        if self.enabled {
121            self.memory.write().record_free(name, bytes);
122        }
123    }
124
125    /// Gets peak memory usage in bytes.
126    pub fn peak_memory(&self) -> usize {
127        self.memory.read().peak_usage()
128    }
129
130    /// Gets current memory usage in bytes.
131    pub fn current_memory(&self) -> usize {
132        self.memory.read().current_usage()
133    }
134
135    /// Gets the total time spent on an operation.
136    pub fn total_time(&self, name: &str) -> std::time::Duration {
137        self.compute.read().total_time(name)
138    }
139
140    /// Gets the average time for an operation.
141    pub fn avg_time(&self, name: &str) -> std::time::Duration {
142        self.compute.read().avg_time(name)
143    }
144
145    /// Resets all profiling data.
146    pub fn reset(&self) {
147        self.memory.write().reset();
148        self.compute.write().reset();
149        self.timeline.write().reset();
150    }
151
152    /// Generates a summary report.
153    pub fn summary(&self) -> ProfileReport {
154        ProfileReport::generate(self)
155    }
156
157    /// Prints a summary to stdout.
158    pub fn print_summary(&self) {
159        println!("{}", self.summary());
160    }
161
162    /// Analyzes for bottlenecks.
163    pub fn analyze_bottlenecks(&self) -> Vec<Bottleneck> {
164        let analyzer = BottleneckAnalyzer::new();
165        let compute_stats = self.compute.read().all_stats();
166        let memory_stats = self.memory.read().stats();
167        analyzer.analyze(&compute_stats, &memory_stats)
168    }
169}
170
171/// RAII guard for automatic profiling scope.
172pub struct ProfileGuard<'a> {
173    profiler: &'a Profiler,
174    name: String,
175}
176
177impl<'a> ProfileGuard<'a> {
178    /// Creates a new profile guard.
179    pub fn new(profiler: &'a Profiler, name: &str) -> Self {
180        profiler.start(name);
181        Self {
182            profiler,
183            name: name.to_string(),
184        }
185    }
186}
187
188impl<'a> Drop for ProfileGuard<'a> {
189    fn drop(&mut self) {
190        self.profiler.stop(&self.name);
191    }
192}
193
194/// Creates a profile guard for automatic scope-based profiling.
195#[macro_export]
196macro_rules! profile_scope {
197    ($profiler:expr, $name:expr) => {
198        let _guard = $crate::ProfileGuard::new($profiler, $name);
199    };
200}
201
202// =============================================================================
203// Global Profiler
204// =============================================================================
205
206use std::sync::OnceLock;
207
208static GLOBAL_PROFILER: OnceLock<Profiler> = OnceLock::new();
209
210/// Gets the global profiler instance.
211pub fn global_profiler() -> &'static Profiler {
212    GLOBAL_PROFILER.get_or_init(Profiler::new)
213}
214
215/// Starts profiling an operation using the global profiler.
216pub fn start(name: &str) {
217    global_profiler().start(name);
218}
219
220/// Stops profiling an operation using the global profiler.
221pub fn stop(name: &str) {
222    global_profiler().stop(name);
223}
224
225/// Records a memory allocation using the global profiler.
226pub fn record_alloc(name: &str, bytes: usize) {
227    global_profiler().record_alloc(name, bytes);
228}
229
230/// Records a memory deallocation using the global profiler.
231pub fn record_free(name: &str, bytes: usize) {
232    global_profiler().record_free(name, bytes);
233}
234
235// =============================================================================
236// Tests
237// =============================================================================
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    #[test]
244    fn test_profiler_creation() {
245        let profiler = Profiler::new();
246        assert!(profiler.is_enabled());
247    }
248
249    #[test]
250    fn test_profile_operation() {
251        let profiler = Profiler::new();
252        profiler.start("test_op");
253        std::thread::sleep(std::time::Duration::from_millis(10));
254        profiler.stop("test_op");
255
256        let total = profiler.total_time("test_op");
257        assert!(total.as_millis() >= 10);
258    }
259
260    #[test]
261    fn test_memory_tracking() {
262        let profiler = Profiler::new();
263        profiler.record_alloc("tensor_a", 1024);
264        profiler.record_alloc("tensor_b", 2048);
265
266        assert_eq!(profiler.current_memory(), 3072);
267        assert_eq!(profiler.peak_memory(), 3072);
268
269        profiler.record_free("tensor_a", 1024);
270        assert_eq!(profiler.current_memory(), 2048);
271        assert_eq!(profiler.peak_memory(), 3072);
272    }
273
274    #[test]
275    fn test_profile_guard() {
276        let profiler = Profiler::new();
277        {
278            let _guard = ProfileGuard::new(&profiler, "scoped_op");
279            std::thread::sleep(std::time::Duration::from_millis(5));
280        }
281
282        let total = profiler.total_time("scoped_op");
283        assert!(total.as_millis() >= 5);
284    }
285
286    #[test]
287    fn test_reset() {
288        let profiler = Profiler::new();
289        profiler.start("test");
290        profiler.stop("test");
291        profiler.record_alloc("mem", 1000);
292
293        profiler.reset();
294
295        assert_eq!(profiler.current_memory(), 0);
296        assert_eq!(profiler.total_time("test"), std::time::Duration::ZERO);
297    }
298}