1use crate::context::{ExecutionHook, ExecutionPhase, ExecutionState};
4use std::collections::HashMap;
5use std::time::{Duration, Instant};
6
7#[derive(Debug, Clone)]
9pub struct OpProfile {
10 pub op_type: String,
11 pub count: usize,
12 pub total_time: Duration,
13 pub avg_time: Duration,
14 pub min_time: Duration,
15 pub max_time: Duration,
16}
17
18impl OpProfile {
19 pub fn new(op_type: impl Into<String>) -> Self {
20 OpProfile {
21 op_type: op_type.into(),
22 count: 0,
23 total_time: Duration::ZERO,
24 avg_time: Duration::ZERO,
25 min_time: Duration::MAX,
26 max_time: Duration::ZERO,
27 }
28 }
29
30 pub fn record(&mut self, duration: Duration) {
31 self.count += 1;
32 self.total_time += duration;
33 self.avg_time = self.total_time / self.count as u32;
34 self.min_time = self.min_time.min(duration);
35 self.max_time = self.max_time.max(duration);
36 }
37}
38
39#[derive(Debug, Clone, Default)]
41pub struct MemoryProfile {
42 pub peak_bytes: usize,
43 pub current_bytes: usize,
44 pub allocations: usize,
45 pub deallocations: usize,
46}
47
48impl MemoryProfile {
49 pub fn new() -> Self {
50 Self::default()
51 }
52
53 pub fn record_allocation(&mut self, bytes: usize) {
54 self.current_bytes += bytes;
55 self.peak_bytes = self.peak_bytes.max(self.current_bytes);
56 self.allocations += 1;
57 }
58
59 pub fn record_deallocation(&mut self, bytes: usize) {
60 self.current_bytes = self.current_bytes.saturating_sub(bytes);
61 self.deallocations += 1;
62 }
63}
64
65#[derive(Debug, Clone)]
67pub struct ProfileData {
68 pub op_profiles: HashMap<String, OpProfile>,
69 pub memory: MemoryProfile,
70 pub total_execution_time: Duration,
71}
72
73impl ProfileData {
74 pub fn new() -> Self {
75 ProfileData {
76 op_profiles: HashMap::new(),
77 memory: MemoryProfile::new(),
78 total_execution_time: Duration::ZERO,
79 }
80 }
81
82 pub fn record_op(&mut self, op_type: impl Into<String>, duration: Duration) {
83 let op_type = op_type.into();
84 self.op_profiles
85 .entry(op_type.clone())
86 .or_insert_with(|| OpProfile::new(op_type))
87 .record(duration);
88 }
89
90 pub fn get_op_profile(&self, op_type: &str) -> Option<&OpProfile> {
91 self.op_profiles.get(op_type)
92 }
93
94 pub fn slowest_ops(&self, limit: usize) -> Vec<(&str, &OpProfile)> {
96 let mut ops: Vec<_> = self
97 .op_profiles
98 .iter()
99 .map(|(name, profile)| (name.as_str(), profile))
100 .collect();
101
102 ops.sort_by(|a, b| b.1.total_time.cmp(&a.1.total_time));
103 ops.truncate(limit);
104 ops
105 }
106
107 pub fn summary(&self) -> String {
109 let mut report = String::new();
110 report.push_str(&format!(
111 "Total Execution Time: {:.3}ms\n",
112 self.total_execution_time.as_secs_f64() * 1000.0
113 ));
114 report.push_str(&format!("Peak Memory: {} bytes\n", self.memory.peak_bytes));
115 report.push_str("\nTop Operations by Time:\n");
116
117 for (name, profile) in self.slowest_ops(5) {
118 report.push_str(&format!(
119 " {}: {} calls, {:.3}ms total, {:.3}ms avg\n",
120 name,
121 profile.count,
122 profile.total_time.as_secs_f64() * 1000.0,
123 profile.avg_time.as_secs_f64() * 1000.0
124 ));
125 }
126
127 report
128 }
129}
130
131impl Default for ProfileData {
132 fn default() -> Self {
133 Self::new()
134 }
135}
136
137pub struct Profiler {
139 data: ProfileData,
140 start_time: Option<Instant>,
141}
142
143impl Profiler {
144 pub fn new() -> Self {
145 Profiler {
146 data: ProfileData::new(),
147 start_time: None,
148 }
149 }
150
151 pub fn start(&mut self) {
152 self.start_time = Some(Instant::now());
153 }
154
155 pub fn stop(&mut self) {
156 if let Some(start) = self.start_time.take() {
157 self.data.total_execution_time = start.elapsed();
158 }
159 }
160
161 pub fn time_op<F, R>(&mut self, op_type: impl Into<String>, f: F) -> R
163 where
164 F: FnOnce() -> R,
165 {
166 let start = Instant::now();
167 let result = f();
168 let duration = start.elapsed();
169 self.data.record_op(op_type, duration);
170 result
171 }
172
173 pub fn record_allocation(&mut self, bytes: usize) {
174 self.data.memory.record_allocation(bytes);
175 }
176
177 pub fn record_deallocation(&mut self, bytes: usize) {
178 self.data.memory.record_deallocation(bytes);
179 }
180
181 pub fn data(&self) -> &ProfileData {
182 &self.data
183 }
184
185 pub fn into_data(mut self) -> ProfileData {
186 self.stop();
187 self.data
188 }
189}
190
191impl Default for Profiler {
192 fn default() -> Self {
193 Self::new()
194 }
195}
196
197pub trait TlProfiledExecutor {
199 fn profiler(&self) -> Option<&Profiler>;
201
202 fn profiler_mut(&mut self) -> Option<&mut Profiler>;
204
205 fn enable_profiling(&mut self);
207
208 fn disable_profiling(&mut self);
210
211 fn get_profile_data(&self) -> Option<&ProfileData> {
213 self.profiler().map(|p| p.data())
214 }
215}
216
217pub struct ProfilerHook {
219 profiler: Profiler,
220 node_timings: HashMap<usize, Instant>,
221}
222
223impl ProfilerHook {
224 pub fn new() -> Self {
225 ProfilerHook {
226 profiler: Profiler::new(),
227 node_timings: HashMap::new(),
228 }
229 }
230
231 pub fn profiler(&self) -> &Profiler {
232 &self.profiler
233 }
234
235 pub fn into_profiler(self) -> Profiler {
236 self.profiler
237 }
238
239 pub fn into_data(self) -> ProfileData {
240 self.profiler.into_data()
241 }
242}
243
244impl Default for ProfilerHook {
245 fn default() -> Self {
246 Self::new()
247 }
248}
249
250impl ExecutionHook for ProfilerHook {
251 fn on_phase_change(&mut self, phase: ExecutionPhase, _state: &ExecutionState) {
252 match phase {
253 ExecutionPhase::Executing => {
254 self.profiler.start();
255 }
256 ExecutionPhase::Completed | ExecutionPhase::Failed | ExecutionPhase::Cancelled => {
257 self.profiler.stop();
258 }
259 _ => {}
260 }
261 }
262
263 fn on_node_start(&mut self, node_idx: usize, _state: &ExecutionState) {
264 self.node_timings.insert(node_idx, Instant::now());
265 }
266
267 fn on_node_complete(&mut self, node_idx: usize, _duration: Duration, state: &ExecutionState) {
268 if let Some(start_time) = self.node_timings.remove(&node_idx) {
269 let duration = start_time.elapsed();
270 self.profiler
272 .data
273 .record_op(format!("node_{}", node_idx), duration);
274
275 if node_idx < state.total_nodes {
277 self.profiler.data.record_op("graph_node", duration);
278 }
279 }
280 }
281
282 fn on_error(&mut self, _error: &str, _state: &ExecutionState) {
283 self.profiler.stop();
285 }
286
287 fn on_complete(&mut self, _state: &ExecutionState) {
288 self.profiler.stop();
290 }
291}
292
293#[derive(Debug, Clone)]
295pub struct TraceEntry {
296 pub node_idx: usize,
297 pub start_time: Duration,
298 pub end_time: Duration,
299 pub duration: Duration,
300 pub op_type: String,
301}
302
303impl TraceEntry {
304 pub fn new(
305 node_idx: usize,
306 start_time: Duration,
307 end_time: Duration,
308 op_type: impl Into<String>,
309 ) -> Self {
310 let duration = end_time - start_time;
311 TraceEntry {
312 node_idx,
313 start_time,
314 end_time,
315 duration,
316 op_type: op_type.into(),
317 }
318 }
319}
320
321pub struct TimelineProfiler {
323 traces: Vec<TraceEntry>,
324 start_instant: Option<Instant>,
325 node_starts: HashMap<usize, (Instant, String)>,
326}
327
328impl TimelineProfiler {
329 pub fn new() -> Self {
330 TimelineProfiler {
331 traces: Vec::new(),
332 start_instant: None,
333 node_starts: HashMap::new(),
334 }
335 }
336
337 pub fn start(&mut self) {
338 self.start_instant = Some(Instant::now());
339 }
340
341 pub fn record_node_start(&mut self, node_idx: usize, op_type: impl Into<String>) {
342 if self.start_instant.is_some() {
343 self.node_starts
344 .insert(node_idx, (Instant::now(), op_type.into()));
345 }
346 }
347
348 pub fn record_node_end(&mut self, node_idx: usize) {
349 if let (Some(start_instant), Some((node_start, op_type))) =
350 (self.start_instant, self.node_starts.remove(&node_idx))
351 {
352 let now = Instant::now();
353 let start_time = node_start.duration_since(start_instant);
354 let end_time = now.duration_since(start_instant);
355
356 self.traces
357 .push(TraceEntry::new(node_idx, start_time, end_time, op_type));
358 }
359 }
360
361 pub fn traces(&self) -> &[TraceEntry] {
362 &self.traces
363 }
364
365 pub fn critical_path_duration(&self) -> Duration {
367 self.traces.iter().map(|t| t.duration).sum()
368 }
369
370 pub fn summary(&self) -> String {
372 let mut report = String::new();
373 let total_duration = self.critical_path_duration();
374
375 report.push_str(&format!(
376 "Total Timeline Duration: {:.3}ms\n",
377 total_duration.as_secs_f64() * 1000.0
378 ));
379 report.push_str(&format!("Traced Nodes: {}\n\n", self.traces.len()));
380
381 report.push_str("Node Timeline:\n");
382 for trace in &self.traces {
383 report.push_str(&format!(
384 " Node {}: {:.3}ms - {:.3}ms ({:.3}ms) - {}\n",
385 trace.node_idx,
386 trace.start_time.as_secs_f64() * 1000.0,
387 trace.end_time.as_secs_f64() * 1000.0,
388 trace.duration.as_secs_f64() * 1000.0,
389 trace.op_type
390 ));
391 }
392
393 report
394 }
395}
396
397impl Default for TimelineProfiler {
398 fn default() -> Self {
399 Self::new()
400 }
401}
402
403#[derive(Debug, Clone)]
405pub struct ProfileStatistics {
406 pub mean: f64,
407 pub median: f64,
408 pub p50: f64,
409 pub p90: f64,
410 pub p95: f64,
411 pub p99: f64,
412 pub std_dev: f64,
413}
414
415impl ProfileStatistics {
416 pub fn from_durations(durations: &mut [Duration]) -> Self {
418 if durations.is_empty() {
419 return ProfileStatistics {
420 mean: 0.0,
421 median: 0.0,
422 p50: 0.0,
423 p90: 0.0,
424 p95: 0.0,
425 p99: 0.0,
426 std_dev: 0.0,
427 };
428 }
429
430 durations.sort();
431
432 let values: Vec<f64> = durations.iter().map(|d| d.as_secs_f64()).collect();
433 let n = values.len();
434
435 let mean = values.iter().sum::<f64>() / n as f64;
437
438 let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n as f64;
440 let std_dev = variance.sqrt();
441
442 let percentile = |p: f64| -> f64 {
444 let index = ((n as f64 * p).ceil() as usize).saturating_sub(1);
445 values[index.min(n - 1)]
446 };
447
448 ProfileStatistics {
449 mean,
450 median: percentile(0.50),
451 p50: percentile(0.50),
452 p90: percentile(0.90),
453 p95: percentile(0.95),
454 p99: percentile(0.99),
455 std_dev,
456 }
457 }
458}
459
460#[derive(Debug, Clone)]
462pub struct PerformanceBaseline {
463 pub op_baselines: HashMap<String, ProfileStatistics>,
464 pub total_time_baseline: Duration,
465}
466
467impl PerformanceBaseline {
468 pub fn new() -> Self {
469 PerformanceBaseline {
470 op_baselines: HashMap::new(),
471 total_time_baseline: Duration::ZERO,
472 }
473 }
474
475 pub fn from_profile_data(data: &ProfileData) -> Self {
476 let mut baseline = PerformanceBaseline::new();
477 baseline.total_time_baseline = data.total_execution_time;
478
479 for (op_type, profile) in &data.op_profiles {
480 let stats = ProfileStatistics {
482 mean: profile.avg_time.as_secs_f64(),
483 median: profile.avg_time.as_secs_f64(),
484 p50: profile.avg_time.as_secs_f64(),
485 p90: profile.max_time.as_secs_f64(),
486 p95: profile.max_time.as_secs_f64(),
487 p99: profile.max_time.as_secs_f64(),
488 std_dev: 0.0,
489 };
490 baseline.op_baselines.insert(op_type.clone(), stats);
491 }
492
493 baseline
494 }
495
496 pub fn compare(&self, data: &ProfileData) -> PerformanceComparison {
498 let mut comparison = PerformanceComparison::new();
499
500 let total_time_ratio = if self.total_time_baseline.as_secs_f64() > 0.0 {
502 data.total_execution_time.as_secs_f64() / self.total_time_baseline.as_secs_f64()
503 } else {
504 1.0
505 };
506
507 comparison.total_time_ratio = total_time_ratio;
508 comparison.is_regression = total_time_ratio > 1.1; for (op_type, profile) in &data.op_profiles {
512 if let Some(baseline_stats) = self.op_baselines.get(op_type) {
513 let current_mean = profile.avg_time.as_secs_f64();
514 let ratio = current_mean / baseline_stats.mean;
515 if ratio > 1.1 {
516 comparison.slow_ops.push((
517 op_type.clone(),
518 ratio,
519 current_mean - baseline_stats.mean,
520 ));
521 }
522 }
523 }
524
525 comparison
526 .slow_ops
527 .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
528
529 comparison
530 }
531}
532
533impl Default for PerformanceBaseline {
534 fn default() -> Self {
535 Self::new()
536 }
537}
538
539#[derive(Debug, Clone)]
541pub struct PerformanceComparison {
542 pub is_regression: bool,
543 pub total_time_ratio: f64,
544 pub slow_ops: Vec<(String, f64, f64)>, }
546
547impl PerformanceComparison {
548 pub fn new() -> Self {
549 PerformanceComparison {
550 is_regression: false,
551 total_time_ratio: 1.0,
552 slow_ops: Vec::new(),
553 }
554 }
555
556 pub fn summary(&self) -> String {
557 let mut report = String::new();
558
559 if self.is_regression {
560 report.push_str("⚠️ PERFORMANCE REGRESSION DETECTED\n\n");
561 } else {
562 report.push_str("✓ Performance within acceptable range\n\n");
563 }
564
565 report.push_str(&format!(
566 "Total Time Ratio: {:.2}x\n",
567 self.total_time_ratio
568 ));
569
570 if !self.slow_ops.is_empty() {
571 report.push_str("\nSlower Operations:\n");
572 for (op_type, ratio, diff) in &self.slow_ops {
573 report.push_str(&format!(
574 " {}: {:.2}x slower (+{:.3}ms)\n",
575 op_type,
576 ratio,
577 diff * 1000.0
578 ));
579 }
580 }
581
582 report
583 }
584}
585
586impl Default for PerformanceComparison {
587 fn default() -> Self {
588 Self::new()
589 }
590}
591
592pub struct BottleneckAnalyzer;
594
595impl BottleneckAnalyzer {
596 pub fn analyze(data: &ProfileData) -> BottleneckReport {
598 let mut report = BottleneckReport::new();
599
600 let total_time = data.total_execution_time.as_secs_f64();
602 if total_time > 0.0 {
603 for (op_type, profile) in &data.op_profiles {
604 let op_time = profile.total_time.as_secs_f64();
605 let percentage = (op_time / total_time) * 100.0;
606
607 if percentage > 10.0 {
608 report.bottlenecks.push(Bottleneck {
609 op_type: op_type.clone(),
610 percentage,
611 total_time: profile.total_time,
612 call_count: profile.count,
613 avg_time: profile.avg_time,
614 });
615 }
616 }
617 }
618
619 report
621 .bottlenecks
622 .sort_by(|a, b| b.percentage.partial_cmp(&a.percentage).unwrap());
623
624 if data.memory.peak_bytes > 1_000_000_000 {
626 report.memory_pressure_high = true;
628 }
629
630 report
631 }
632}
633
634#[derive(Debug, Clone)]
636pub struct Bottleneck {
637 pub op_type: String,
638 pub percentage: f64,
639 pub total_time: Duration,
640 pub call_count: usize,
641 pub avg_time: Duration,
642}
643
644#[derive(Debug, Clone)]
646pub struct BottleneckReport {
647 pub bottlenecks: Vec<Bottleneck>,
648 pub memory_pressure_high: bool,
649}
650
651impl BottleneckReport {
652 pub fn new() -> Self {
653 BottleneckReport {
654 bottlenecks: Vec::new(),
655 memory_pressure_high: false,
656 }
657 }
658
659 pub fn has_bottlenecks(&self) -> bool {
660 !self.bottlenecks.is_empty() || self.memory_pressure_high
661 }
662
663 pub fn summary(&self) -> String {
664 let mut report = String::new();
665
666 if self.bottlenecks.is_empty() && !self.memory_pressure_high {
667 report.push_str("No significant bottlenecks detected\n");
668 return report;
669 }
670
671 report.push_str("Performance Bottlenecks:\n\n");
672
673 for bottleneck in &self.bottlenecks {
674 report.push_str(&format!(
675 "• {} - {:.1}% of total time\n",
676 bottleneck.op_type, bottleneck.percentage
677 ));
678 report.push_str(&format!(
679 " {} calls, {:.3}ms avg, {:.3}ms total\n",
680 bottleneck.call_count,
681 bottleneck.avg_time.as_secs_f64() * 1000.0,
682 bottleneck.total_time.as_secs_f64() * 1000.0
683 ));
684 }
685
686 if self.memory_pressure_high {
687 report.push_str("\n⚠️ High memory pressure detected (>1GB peak)\n");
688 }
689
690 report
691 }
692}
693
694impl Default for BottleneckReport {
695 fn default() -> Self {
696 Self::new()
697 }
698}
699
700#[cfg(test)]
701mod tests {
702 use super::*;
703 use std::thread;
704
705 #[test]
706 fn test_op_profile() {
707 let mut profile = OpProfile::new("einsum");
708 assert_eq!(profile.count, 0);
709
710 profile.record(Duration::from_millis(10));
711 assert_eq!(profile.count, 1);
712 assert!(profile.total_time >= Duration::from_millis(10));
713
714 profile.record(Duration::from_millis(20));
715 assert_eq!(profile.count, 2);
716 }
717
718 #[test]
719 fn test_memory_profile() {
720 let mut mem = MemoryProfile::new();
721 assert_eq!(mem.current_bytes, 0);
722 assert_eq!(mem.peak_bytes, 0);
723
724 mem.record_allocation(1000);
725 assert_eq!(mem.current_bytes, 1000);
726 assert_eq!(mem.peak_bytes, 1000);
727
728 mem.record_allocation(500);
729 assert_eq!(mem.current_bytes, 1500);
730 assert_eq!(mem.peak_bytes, 1500);
731
732 mem.record_deallocation(500);
733 assert_eq!(mem.current_bytes, 1000);
734 assert_eq!(mem.peak_bytes, 1500); }
736
737 #[test]
738 fn test_profile_data() {
739 let mut data = ProfileData::new();
740
741 data.record_op("einsum", Duration::from_millis(100));
742 data.record_op("relu", Duration::from_millis(50));
743 data.record_op("einsum", Duration::from_millis(80));
744
745 let einsum_profile = data.get_op_profile("einsum").unwrap();
746 assert_eq!(einsum_profile.count, 2);
747
748 let slowest = data.slowest_ops(2);
749 assert_eq!(slowest.len(), 2);
750 assert_eq!(slowest[0].0, "einsum"); }
752
753 #[test]
754 fn test_profiler() {
755 let mut profiler = Profiler::new();
756
757 profiler.start();
758
759 profiler.time_op("operation1", || {
761 thread::sleep(Duration::from_millis(1));
762 });
763
764 profiler.time_op("operation2", || {
765 thread::sleep(Duration::from_millis(2));
766 });
767
768 profiler.stop();
769
770 let data = profiler.data();
771 assert_eq!(data.op_profiles.len(), 2);
772 assert!(data.total_execution_time >= Duration::from_millis(3));
773 }
774
775 #[test]
776 fn test_profile_summary() {
777 let mut data = ProfileData::new();
778 data.record_op("einsum", Duration::from_millis(100));
779 data.record_op("relu", Duration::from_millis(50));
780 data.total_execution_time = Duration::from_millis(150);
781 data.memory.record_allocation(1024);
782
783 let summary = data.summary();
784 assert!(summary.contains("150"));
785 assert!(summary.contains("1024"));
786 assert!(summary.contains("einsum"));
787 }
788
789 #[test]
790 fn test_profiler_hook_creation() {
791 let hook = ProfilerHook::new();
792 assert_eq!(hook.profiler().data().op_profiles.len(), 0);
793 }
794
795 #[test]
796 fn test_profiler_hook_with_execution() {
797 use crate::context::ExecutionState;
798
799 let mut hook = ProfilerHook::new();
800 let state = ExecutionState::new(5);
801
802 hook.on_phase_change(ExecutionPhase::Executing, &state);
804 hook.on_node_start(0, &state);
805 thread::sleep(Duration::from_millis(1));
806 hook.on_node_complete(0, Duration::from_millis(1), &state);
807 hook.on_phase_change(ExecutionPhase::Completed, &state);
808
809 let data = hook.profiler().data();
810 assert!(!data.op_profiles.is_empty());
811 assert!(data.total_execution_time > Duration::ZERO);
812 }
813
814 #[test]
815 fn test_timeline_profiler() {
816 let mut profiler = TimelineProfiler::new();
817 profiler.start();
818
819 profiler.record_node_start(0, "einsum");
820 thread::sleep(Duration::from_millis(1));
821 profiler.record_node_end(0);
822
823 profiler.record_node_start(1, "relu");
824 thread::sleep(Duration::from_millis(1));
825 profiler.record_node_end(1);
826
827 let traces = profiler.traces();
828 assert_eq!(traces.len(), 2);
829 assert_eq!(traces[0].node_idx, 0);
830 assert_eq!(traces[1].node_idx, 1);
831 }
832
833 #[test]
834 fn test_timeline_summary() {
835 let mut profiler = TimelineProfiler::new();
836 profiler.start();
837
838 profiler.record_node_start(0, "einsum");
839 thread::sleep(Duration::from_millis(1));
840 profiler.record_node_end(0);
841
842 let summary = profiler.summary();
843 assert!(summary.contains("Node 0"));
844 assert!(summary.contains("einsum"));
845 }
846
847 #[test]
848 fn test_trace_entry() {
849 let entry = TraceEntry::new(
850 0,
851 Duration::from_millis(0),
852 Duration::from_millis(100),
853 "einsum",
854 );
855
856 assert_eq!(entry.node_idx, 0);
857 assert_eq!(entry.duration, Duration::from_millis(100));
858 assert_eq!(entry.op_type, "einsum");
859 }
860
861 #[test]
862 fn test_profile_statistics_empty() {
863 let stats = ProfileStatistics::from_durations(&mut []);
864 assert_eq!(stats.mean, 0.0);
865 assert_eq!(stats.median, 0.0);
866 }
867
868 #[test]
869 fn test_profile_statistics() {
870 let mut durations = vec![
871 Duration::from_millis(10),
872 Duration::from_millis(20),
873 Duration::from_millis(30),
874 Duration::from_millis(40),
875 Duration::from_millis(50),
876 ];
877
878 let stats = ProfileStatistics::from_durations(&mut durations);
879 assert!(stats.mean > 0.0);
880 assert!(stats.p50 > 0.0);
881 assert!(stats.p90 > 0.0);
882 assert!(stats.p95 > 0.0);
883 assert!(stats.p99 > 0.0);
884 assert!(stats.p99 >= stats.p95);
885 assert!(stats.p95 >= stats.p90);
886 assert!(stats.p90 >= stats.p50);
887 }
888
889 #[test]
890 fn test_performance_baseline_creation() {
891 let mut data = ProfileData::new();
892 data.record_op("einsum", Duration::from_millis(100));
893 data.record_op("relu", Duration::from_millis(50));
894 data.total_execution_time = Duration::from_millis(150);
895
896 let baseline = PerformanceBaseline::from_profile_data(&data);
897 assert_eq!(baseline.op_baselines.len(), 2);
898 assert!(baseline.op_baselines.contains_key("einsum"));
899 assert!(baseline.op_baselines.contains_key("relu"));
900 }
901
902 #[test]
903 fn test_performance_comparison_no_regression() {
904 let mut baseline_data = ProfileData::new();
905 baseline_data.record_op("einsum", Duration::from_millis(100));
906 baseline_data.total_execution_time = Duration::from_millis(100);
907
908 let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
909
910 let mut current_data = ProfileData::new();
911 current_data.record_op("einsum", Duration::from_millis(105)); current_data.total_execution_time = Duration::from_millis(105);
913
914 let comparison = baseline.compare(¤t_data);
915 assert!(!comparison.is_regression); }
917
918 #[test]
919 fn test_performance_comparison_with_regression() {
920 let mut baseline_data = ProfileData::new();
921 baseline_data.record_op("einsum", Duration::from_millis(100));
922 baseline_data.total_execution_time = Duration::from_millis(100);
923
924 let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
925
926 let mut current_data = ProfileData::new();
927 current_data.record_op("einsum", Duration::from_millis(120)); current_data.total_execution_time = Duration::from_millis(120);
929
930 let comparison = baseline.compare(¤t_data);
931 assert!(comparison.is_regression);
932 assert!(comparison.total_time_ratio > 1.1);
933 }
934
935 #[test]
936 fn test_performance_comparison_summary() {
937 let mut baseline_data = ProfileData::new();
938 baseline_data.record_op("einsum", Duration::from_millis(100));
939 baseline_data.total_execution_time = Duration::from_millis(100);
940
941 let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
942
943 let mut current_data = ProfileData::new();
944 current_data.record_op("einsum", Duration::from_millis(150));
945 current_data.total_execution_time = Duration::from_millis(150);
946
947 let comparison = baseline.compare(¤t_data);
948 let summary = comparison.summary();
949 assert!(summary.contains("REGRESSION") || summary.contains("1."));
950 }
951
952 #[test]
953 fn test_bottleneck_analyzer_no_bottlenecks() {
954 let mut data = ProfileData::new();
955 data.record_op("op1", Duration::from_millis(5));
956 data.record_op("op2", Duration::from_millis(5));
957 data.total_execution_time = Duration::from_millis(100);
958
959 let report = BottleneckAnalyzer::analyze(&data);
960 assert!(report.bottlenecks.is_empty());
961 assert!(!report.has_bottlenecks());
962 }
963
964 #[test]
965 fn test_bottleneck_analyzer_with_bottleneck() {
966 let mut data = ProfileData::new();
967 data.record_op("slow_op", Duration::from_millis(50));
968 data.record_op("fast_op", Duration::from_millis(5));
969 data.total_execution_time = Duration::from_millis(100);
970
971 let report = BottleneckAnalyzer::analyze(&data);
972 assert!(!report.bottlenecks.is_empty());
973 assert_eq!(report.bottlenecks[0].op_type, "slow_op");
974 assert!(report.bottlenecks[0].percentage > 10.0);
975 }
976
977 #[test]
978 fn test_bottleneck_analyzer_memory_pressure() {
979 let mut data = ProfileData::new();
980 data.total_execution_time = Duration::from_millis(100);
981 data.memory.record_allocation(2_000_000_000); let report = BottleneckAnalyzer::analyze(&data);
984 assert!(report.memory_pressure_high);
985 assert!(report.has_bottlenecks());
986 }
987
988 #[test]
989 fn test_bottleneck_report_summary() {
990 let mut data = ProfileData::new();
991 data.record_op("slow_op", Duration::from_millis(60));
992 data.total_execution_time = Duration::from_millis(100);
993
994 let report = BottleneckAnalyzer::analyze(&data);
995 let summary = report.summary();
996 assert!(summary.contains("slow_op"));
997 assert!(summary.contains("%"));
998 }
999
1000 #[test]
1001 fn test_bottleneck_report_summary_no_issues() {
1002 let report = BottleneckReport::new();
1003 let summary = report.summary();
1004 assert!(summary.contains("No significant bottlenecks"));
1005 }
1006}