1#![warn(missing_docs)]
32#![warn(clippy::all)]
33
34pub mod error;
35pub mod memory;
36pub mod compute;
37pub mod timeline;
38pub mod bottleneck;
39pub mod report;
40
41pub use error::{ProfileError, ProfileResult};
42pub use memory::{MemoryProfiler, MemoryStats, AllocationRecord};
43pub use compute::{ComputeProfiler, OperationStats, ProfiledOp};
44pub use timeline::{TimelineProfiler, Event, EventType};
45pub use bottleneck::{BottleneckAnalyzer, Bottleneck, BottleneckType};
46pub use report::{ProfileReport, ReportFormat};
47
48use std::sync::Arc;
49use parking_lot::RwLock;
50
51#[derive(Debug)]
57pub struct Profiler {
58 pub memory: Arc<RwLock<MemoryProfiler>>,
60 pub compute: Arc<RwLock<ComputeProfiler>>,
62 pub timeline: Arc<RwLock<TimelineProfiler>>,
64 enabled: bool,
66}
67
68impl Default for Profiler {
69 fn default() -> Self {
70 Self::new()
71 }
72}
73
74impl Profiler {
75 pub fn new() -> Self {
77 Self {
78 memory: Arc::new(RwLock::new(MemoryProfiler::new())),
79 compute: Arc::new(RwLock::new(ComputeProfiler::new())),
80 timeline: Arc::new(RwLock::new(TimelineProfiler::new())),
81 enabled: true,
82 }
83 }
84
85 pub fn set_enabled(&mut self, enabled: bool) {
87 self.enabled = enabled;
88 }
89
90 pub fn is_enabled(&self) -> bool {
92 self.enabled
93 }
94
95 pub fn start(&self, name: &str) {
97 if self.enabled {
98 self.compute.write().start(name);
99 self.timeline.write().record(name, EventType::Start);
100 }
101 }
102
103 pub fn stop(&self, name: &str) {
105 if self.enabled {
106 self.compute.write().stop(name);
107 self.timeline.write().record(name, EventType::End);
108 }
109 }
110
111 pub fn record_alloc(&self, name: &str, bytes: usize) {
113 if self.enabled {
114 self.memory.write().record_alloc(name, bytes);
115 }
116 }
117
118 pub fn record_free(&self, name: &str, bytes: usize) {
120 if self.enabled {
121 self.memory.write().record_free(name, bytes);
122 }
123 }
124
125 pub fn peak_memory(&self) -> usize {
127 self.memory.read().peak_usage()
128 }
129
130 pub fn current_memory(&self) -> usize {
132 self.memory.read().current_usage()
133 }
134
135 pub fn total_time(&self, name: &str) -> std::time::Duration {
137 self.compute.read().total_time(name)
138 }
139
140 pub fn avg_time(&self, name: &str) -> std::time::Duration {
142 self.compute.read().avg_time(name)
143 }
144
145 pub fn reset(&self) {
147 self.memory.write().reset();
148 self.compute.write().reset();
149 self.timeline.write().reset();
150 }
151
152 pub fn summary(&self) -> ProfileReport {
154 ProfileReport::generate(self)
155 }
156
157 pub fn print_summary(&self) {
159 println!("{}", self.summary());
160 }
161
162 pub fn analyze_bottlenecks(&self) -> Vec<Bottleneck> {
164 let analyzer = BottleneckAnalyzer::new();
165 let compute_stats = self.compute.read().all_stats();
166 let memory_stats = self.memory.read().stats();
167 analyzer.analyze(&compute_stats, &memory_stats)
168 }
169}
170
171pub struct ProfileGuard<'a> {
173 profiler: &'a Profiler,
174 name: String,
175}
176
177impl<'a> ProfileGuard<'a> {
178 pub fn new(profiler: &'a Profiler, name: &str) -> Self {
180 profiler.start(name);
181 Self {
182 profiler,
183 name: name.to_string(),
184 }
185 }
186}
187
188impl<'a> Drop for ProfileGuard<'a> {
189 fn drop(&mut self) {
190 self.profiler.stop(&self.name);
191 }
192}
193
194#[macro_export]
196macro_rules! profile_scope {
197 ($profiler:expr, $name:expr) => {
198 let _guard = $crate::ProfileGuard::new($profiler, $name);
199 };
200}
201
202use std::sync::OnceLock;
207
208static GLOBAL_PROFILER: OnceLock<Profiler> = OnceLock::new();
209
210pub fn global_profiler() -> &'static Profiler {
212 GLOBAL_PROFILER.get_or_init(Profiler::new)
213}
214
215pub fn start(name: &str) {
217 global_profiler().start(name);
218}
219
220pub fn stop(name: &str) {
222 global_profiler().stop(name);
223}
224
225pub fn record_alloc(name: &str, bytes: usize) {
227 global_profiler().record_alloc(name, bytes);
228}
229
230pub fn record_free(name: &str, bytes: usize) {
232 global_profiler().record_free(name, bytes);
233}
234
235#[cfg(test)]
240mod tests {
241 use super::*;
242
243 #[test]
244 fn test_profiler_creation() {
245 let profiler = Profiler::new();
246 assert!(profiler.is_enabled());
247 }
248
249 #[test]
250 fn test_profile_operation() {
251 let profiler = Profiler::new();
252 profiler.start("test_op");
253 std::thread::sleep(std::time::Duration::from_millis(10));
254 profiler.stop("test_op");
255
256 let total = profiler.total_time("test_op");
257 assert!(total.as_millis() >= 10);
258 }
259
260 #[test]
261 fn test_memory_tracking() {
262 let profiler = Profiler::new();
263 profiler.record_alloc("tensor_a", 1024);
264 profiler.record_alloc("tensor_b", 2048);
265
266 assert_eq!(profiler.current_memory(), 3072);
267 assert_eq!(profiler.peak_memory(), 3072);
268
269 profiler.record_free("tensor_a", 1024);
270 assert_eq!(profiler.current_memory(), 2048);
271 assert_eq!(profiler.peak_memory(), 3072);
272 }
273
274 #[test]
275 fn test_profile_guard() {
276 let profiler = Profiler::new();
277 {
278 let _guard = ProfileGuard::new(&profiler, "scoped_op");
279 std::thread::sleep(std::time::Duration::from_millis(5));
280 }
281
282 let total = profiler.total_time("scoped_op");
283 assert!(total.as_millis() >= 5);
284 }
285
286 #[test]
287 fn test_reset() {
288 let profiler = Profiler::new();
289 profiler.start("test");
290 profiler.stop("test");
291 profiler.record_alloc("mem", 1000);
292
293 profiler.reset();
294
295 assert_eq!(profiler.current_memory(), 0);
296 assert_eq!(profiler.total_time("test"), std::time::Duration::ZERO);
297 }
298}