1use crate::context::{ExecutionHook, ExecutionPhase, ExecutionState};
4use std::cmp::Reverse;
5use std::collections::HashMap;
6use std::time::{Duration, Instant};
7
8#[derive(Debug, Clone)]
10pub struct OpProfile {
11 pub op_type: String,
12 pub count: usize,
13 pub total_time: Duration,
14 pub avg_time: Duration,
15 pub min_time: Duration,
16 pub max_time: Duration,
17}
18
19impl OpProfile {
20 pub fn new(op_type: impl Into<String>) -> Self {
21 OpProfile {
22 op_type: op_type.into(),
23 count: 0,
24 total_time: Duration::ZERO,
25 avg_time: Duration::ZERO,
26 min_time: Duration::MAX,
27 max_time: Duration::ZERO,
28 }
29 }
30
31 pub fn record(&mut self, duration: Duration) {
32 self.count += 1;
33 self.total_time += duration;
34 self.avg_time = self.total_time / self.count as u32;
35 self.min_time = self.min_time.min(duration);
36 self.max_time = self.max_time.max(duration);
37 }
38}
39
40#[derive(Debug, Clone, Default)]
42pub struct MemoryProfile {
43 pub peak_bytes: usize,
44 pub current_bytes: usize,
45 pub allocations: usize,
46 pub deallocations: usize,
47}
48
49impl MemoryProfile {
50 pub fn new() -> Self {
51 Self::default()
52 }
53
54 pub fn record_allocation(&mut self, bytes: usize) {
55 self.current_bytes += bytes;
56 self.peak_bytes = self.peak_bytes.max(self.current_bytes);
57 self.allocations += 1;
58 }
59
60 pub fn record_deallocation(&mut self, bytes: usize) {
61 self.current_bytes = self.current_bytes.saturating_sub(bytes);
62 self.deallocations += 1;
63 }
64}
65
66#[derive(Debug, Clone)]
68pub struct ProfileData {
69 pub op_profiles: HashMap<String, OpProfile>,
70 pub memory: MemoryProfile,
71 pub total_execution_time: Duration,
72}
73
74impl ProfileData {
75 pub fn new() -> Self {
76 ProfileData {
77 op_profiles: HashMap::new(),
78 memory: MemoryProfile::new(),
79 total_execution_time: Duration::ZERO,
80 }
81 }
82
83 pub fn record_op(&mut self, op_type: impl Into<String>, duration: Duration) {
84 let op_type = op_type.into();
85 self.op_profiles
86 .entry(op_type.clone())
87 .or_insert_with(|| OpProfile::new(op_type))
88 .record(duration);
89 }
90
91 pub fn get_op_profile(&self, op_type: &str) -> Option<&OpProfile> {
92 self.op_profiles.get(op_type)
93 }
94
95 pub fn slowest_ops(&self, limit: usize) -> Vec<(&str, &OpProfile)> {
97 let mut ops: Vec<_> = self
98 .op_profiles
99 .iter()
100 .map(|(name, profile)| (name.as_str(), profile))
101 .collect();
102
103 ops.sort_by_key(|b| Reverse(b.1.total_time));
104 ops.truncate(limit);
105 ops
106 }
107
108 pub fn summary(&self) -> String {
110 let mut report = String::new();
111 report.push_str(&format!(
112 "Total Execution Time: {:.3}ms\n",
113 self.total_execution_time.as_secs_f64() * 1000.0
114 ));
115 report.push_str(&format!("Peak Memory: {} bytes\n", self.memory.peak_bytes));
116 report.push_str("\nTop Operations by Time:\n");
117
118 for (name, profile) in self.slowest_ops(5) {
119 report.push_str(&format!(
120 " {}: {} calls, {:.3}ms total, {:.3}ms avg\n",
121 name,
122 profile.count,
123 profile.total_time.as_secs_f64() * 1000.0,
124 profile.avg_time.as_secs_f64() * 1000.0
125 ));
126 }
127
128 report
129 }
130}
131
132impl Default for ProfileData {
133 fn default() -> Self {
134 Self::new()
135 }
136}
137
138pub struct Profiler {
140 data: ProfileData,
141 start_time: Option<Instant>,
142}
143
144impl Profiler {
145 pub fn new() -> Self {
146 Profiler {
147 data: ProfileData::new(),
148 start_time: None,
149 }
150 }
151
152 pub fn start(&mut self) {
153 self.start_time = Some(Instant::now());
154 }
155
156 pub fn stop(&mut self) {
157 if let Some(start) = self.start_time.take() {
158 self.data.total_execution_time = start.elapsed();
159 }
160 }
161
162 pub fn time_op<F, R>(&mut self, op_type: impl Into<String>, f: F) -> R
164 where
165 F: FnOnce() -> R,
166 {
167 let start = Instant::now();
168 let result = f();
169 let duration = start.elapsed();
170 self.data.record_op(op_type, duration);
171 result
172 }
173
174 pub fn record_allocation(&mut self, bytes: usize) {
175 self.data.memory.record_allocation(bytes);
176 }
177
178 pub fn record_deallocation(&mut self, bytes: usize) {
179 self.data.memory.record_deallocation(bytes);
180 }
181
182 pub fn data(&self) -> &ProfileData {
183 &self.data
184 }
185
186 pub fn into_data(mut self) -> ProfileData {
187 self.stop();
188 self.data
189 }
190}
191
192impl Default for Profiler {
193 fn default() -> Self {
194 Self::new()
195 }
196}
197
198pub trait TlProfiledExecutor {
200 fn profiler(&self) -> Option<&Profiler>;
202
203 fn profiler_mut(&mut self) -> Option<&mut Profiler>;
205
206 fn enable_profiling(&mut self);
208
209 fn disable_profiling(&mut self);
211
212 fn get_profile_data(&self) -> Option<&ProfileData> {
214 self.profiler().map(|p| p.data())
215 }
216}
217
218pub struct ProfilerHook {
220 profiler: Profiler,
221 node_timings: HashMap<usize, Instant>,
222}
223
224impl ProfilerHook {
225 pub fn new() -> Self {
226 ProfilerHook {
227 profiler: Profiler::new(),
228 node_timings: HashMap::new(),
229 }
230 }
231
232 pub fn profiler(&self) -> &Profiler {
233 &self.profiler
234 }
235
236 pub fn into_profiler(self) -> Profiler {
237 self.profiler
238 }
239
240 pub fn into_data(self) -> ProfileData {
241 self.profiler.into_data()
242 }
243}
244
245impl Default for ProfilerHook {
246 fn default() -> Self {
247 Self::new()
248 }
249}
250
251impl ExecutionHook for ProfilerHook {
252 fn on_phase_change(&mut self, phase: ExecutionPhase, _state: &ExecutionState) {
253 match phase {
254 ExecutionPhase::Executing => {
255 self.profiler.start();
256 }
257 ExecutionPhase::Completed | ExecutionPhase::Failed | ExecutionPhase::Cancelled => {
258 self.profiler.stop();
259 }
260 _ => {}
261 }
262 }
263
264 fn on_node_start(&mut self, node_idx: usize, _state: &ExecutionState) {
265 self.node_timings.insert(node_idx, Instant::now());
266 }
267
268 fn on_node_complete(&mut self, node_idx: usize, _duration: Duration, state: &ExecutionState) {
269 if let Some(start_time) = self.node_timings.remove(&node_idx) {
270 let duration = start_time.elapsed();
271 self.profiler
273 .data
274 .record_op(format!("node_{}", node_idx), duration);
275
276 if node_idx < state.total_nodes {
278 self.profiler.data.record_op("graph_node", duration);
279 }
280 }
281 }
282
283 fn on_error(&mut self, _error: &str, _state: &ExecutionState) {
284 self.profiler.stop();
286 }
287
288 fn on_complete(&mut self, _state: &ExecutionState) {
289 self.profiler.stop();
291 }
292}
293
294#[derive(Debug, Clone)]
296pub struct TraceEntry {
297 pub node_idx: usize,
298 pub start_time: Duration,
299 pub end_time: Duration,
300 pub duration: Duration,
301 pub op_type: String,
302}
303
304impl TraceEntry {
305 pub fn new(
306 node_idx: usize,
307 start_time: Duration,
308 end_time: Duration,
309 op_type: impl Into<String>,
310 ) -> Self {
311 let duration = end_time - start_time;
312 TraceEntry {
313 node_idx,
314 start_time,
315 end_time,
316 duration,
317 op_type: op_type.into(),
318 }
319 }
320}
321
322pub struct TimelineProfiler {
324 traces: Vec<TraceEntry>,
325 start_instant: Option<Instant>,
326 node_starts: HashMap<usize, (Instant, String)>,
327}
328
329impl TimelineProfiler {
330 pub fn new() -> Self {
331 TimelineProfiler {
332 traces: Vec::new(),
333 start_instant: None,
334 node_starts: HashMap::new(),
335 }
336 }
337
338 pub fn start(&mut self) {
339 self.start_instant = Some(Instant::now());
340 }
341
342 pub fn record_node_start(&mut self, node_idx: usize, op_type: impl Into<String>) {
343 if self.start_instant.is_some() {
344 self.node_starts
345 .insert(node_idx, (Instant::now(), op_type.into()));
346 }
347 }
348
349 pub fn record_node_end(&mut self, node_idx: usize) {
350 if let (Some(start_instant), Some((node_start, op_type))) =
351 (self.start_instant, self.node_starts.remove(&node_idx))
352 {
353 let now = Instant::now();
354 let start_time = node_start.duration_since(start_instant);
355 let end_time = now.duration_since(start_instant);
356
357 self.traces
358 .push(TraceEntry::new(node_idx, start_time, end_time, op_type));
359 }
360 }
361
362 pub fn traces(&self) -> &[TraceEntry] {
363 &self.traces
364 }
365
366 pub fn critical_path_duration(&self) -> Duration {
368 self.traces.iter().map(|t| t.duration).sum()
369 }
370
371 pub fn summary(&self) -> String {
373 let mut report = String::new();
374 let total_duration = self.critical_path_duration();
375
376 report.push_str(&format!(
377 "Total Timeline Duration: {:.3}ms\n",
378 total_duration.as_secs_f64() * 1000.0
379 ));
380 report.push_str(&format!("Traced Nodes: {}\n\n", self.traces.len()));
381
382 report.push_str("Node Timeline:\n");
383 for trace in &self.traces {
384 report.push_str(&format!(
385 " Node {}: {:.3}ms - {:.3}ms ({:.3}ms) - {}\n",
386 trace.node_idx,
387 trace.start_time.as_secs_f64() * 1000.0,
388 trace.end_time.as_secs_f64() * 1000.0,
389 trace.duration.as_secs_f64() * 1000.0,
390 trace.op_type
391 ));
392 }
393
394 report
395 }
396}
397
398impl Default for TimelineProfiler {
399 fn default() -> Self {
400 Self::new()
401 }
402}
403
404#[derive(Debug, Clone)]
406pub struct ProfileStatistics {
407 pub mean: f64,
408 pub median: f64,
409 pub p50: f64,
410 pub p90: f64,
411 pub p95: f64,
412 pub p99: f64,
413 pub std_dev: f64,
414}
415
416impl ProfileStatistics {
417 pub fn from_durations(durations: &mut [Duration]) -> Self {
419 if durations.is_empty() {
420 return ProfileStatistics {
421 mean: 0.0,
422 median: 0.0,
423 p50: 0.0,
424 p90: 0.0,
425 p95: 0.0,
426 p99: 0.0,
427 std_dev: 0.0,
428 };
429 }
430
431 durations.sort();
432
433 let values: Vec<f64> = durations.iter().map(|d| d.as_secs_f64()).collect();
434 let n = values.len();
435
436 let mean = values.iter().sum::<f64>() / n as f64;
438
439 let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n as f64;
441 let std_dev = variance.sqrt();
442
443 let percentile = |p: f64| -> f64 {
445 let index = ((n as f64 * p).ceil() as usize).saturating_sub(1);
446 values[index.min(n - 1)]
447 };
448
449 ProfileStatistics {
450 mean,
451 median: percentile(0.50),
452 p50: percentile(0.50),
453 p90: percentile(0.90),
454 p95: percentile(0.95),
455 p99: percentile(0.99),
456 std_dev,
457 }
458 }
459}
460
461#[derive(Debug, Clone)]
463pub struct PerformanceBaseline {
464 pub op_baselines: HashMap<String, ProfileStatistics>,
465 pub total_time_baseline: Duration,
466}
467
468impl PerformanceBaseline {
469 pub fn new() -> Self {
470 PerformanceBaseline {
471 op_baselines: HashMap::new(),
472 total_time_baseline: Duration::ZERO,
473 }
474 }
475
476 pub fn from_profile_data(data: &ProfileData) -> Self {
477 let mut baseline = PerformanceBaseline::new();
478 baseline.total_time_baseline = data.total_execution_time;
479
480 for (op_type, profile) in &data.op_profiles {
481 let stats = ProfileStatistics {
483 mean: profile.avg_time.as_secs_f64(),
484 median: profile.avg_time.as_secs_f64(),
485 p50: profile.avg_time.as_secs_f64(),
486 p90: profile.max_time.as_secs_f64(),
487 p95: profile.max_time.as_secs_f64(),
488 p99: profile.max_time.as_secs_f64(),
489 std_dev: 0.0,
490 };
491 baseline.op_baselines.insert(op_type.clone(), stats);
492 }
493
494 baseline
495 }
496
497 pub fn compare(&self, data: &ProfileData) -> PerformanceComparison {
499 let mut comparison = PerformanceComparison::new();
500
501 let total_time_ratio = if self.total_time_baseline.as_secs_f64() > 0.0 {
503 data.total_execution_time.as_secs_f64() / self.total_time_baseline.as_secs_f64()
504 } else {
505 1.0
506 };
507
508 comparison.total_time_ratio = total_time_ratio;
509 comparison.is_regression = total_time_ratio > 1.1; for (op_type, profile) in &data.op_profiles {
513 if let Some(baseline_stats) = self.op_baselines.get(op_type) {
514 let current_mean = profile.avg_time.as_secs_f64();
515 let ratio = current_mean / baseline_stats.mean;
516 if ratio > 1.1 {
517 comparison.slow_ops.push((
518 op_type.clone(),
519 ratio,
520 current_mean - baseline_stats.mean,
521 ));
522 }
523 }
524 }
525
526 comparison
527 .slow_ops
528 .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
529
530 comparison
531 }
532}
533
534impl Default for PerformanceBaseline {
535 fn default() -> Self {
536 Self::new()
537 }
538}
539
540#[derive(Debug, Clone)]
542pub struct PerformanceComparison {
543 pub is_regression: bool,
544 pub total_time_ratio: f64,
545 pub slow_ops: Vec<(String, f64, f64)>, }
547
548impl PerformanceComparison {
549 pub fn new() -> Self {
550 PerformanceComparison {
551 is_regression: false,
552 total_time_ratio: 1.0,
553 slow_ops: Vec::new(),
554 }
555 }
556
557 pub fn summary(&self) -> String {
558 let mut report = String::new();
559
560 if self.is_regression {
561 report.push_str("⚠️ PERFORMANCE REGRESSION DETECTED\n\n");
562 } else {
563 report.push_str("✓ Performance within acceptable range\n\n");
564 }
565
566 report.push_str(&format!(
567 "Total Time Ratio: {:.2}x\n",
568 self.total_time_ratio
569 ));
570
571 if !self.slow_ops.is_empty() {
572 report.push_str("\nSlower Operations:\n");
573 for (op_type, ratio, diff) in &self.slow_ops {
574 report.push_str(&format!(
575 " {}: {:.2}x slower (+{:.3}ms)\n",
576 op_type,
577 ratio,
578 diff * 1000.0
579 ));
580 }
581 }
582
583 report
584 }
585}
586
587impl Default for PerformanceComparison {
588 fn default() -> Self {
589 Self::new()
590 }
591}
592
593pub struct BottleneckAnalyzer;
595
596impl BottleneckAnalyzer {
597 pub fn analyze(data: &ProfileData) -> BottleneckReport {
599 let mut report = BottleneckReport::new();
600
601 let total_time = data.total_execution_time.as_secs_f64();
603 if total_time > 0.0 {
604 for (op_type, profile) in &data.op_profiles {
605 let op_time = profile.total_time.as_secs_f64();
606 let percentage = (op_time / total_time) * 100.0;
607
608 if percentage > 10.0 {
609 report.bottlenecks.push(Bottleneck {
610 op_type: op_type.clone(),
611 percentage,
612 total_time: profile.total_time,
613 call_count: profile.count,
614 avg_time: profile.avg_time,
615 });
616 }
617 }
618 }
619
620 report.bottlenecks.sort_by(|a, b| {
622 b.percentage
623 .partial_cmp(&a.percentage)
624 .unwrap_or(std::cmp::Ordering::Equal)
625 });
626
627 if data.memory.peak_bytes > 1_000_000_000 {
629 report.memory_pressure_high = true;
631 }
632
633 report
634 }
635}
636
637#[derive(Debug, Clone)]
639pub struct Bottleneck {
640 pub op_type: String,
641 pub percentage: f64,
642 pub total_time: Duration,
643 pub call_count: usize,
644 pub avg_time: Duration,
645}
646
647#[derive(Debug, Clone)]
649pub struct BottleneckReport {
650 pub bottlenecks: Vec<Bottleneck>,
651 pub memory_pressure_high: bool,
652}
653
654impl BottleneckReport {
655 pub fn new() -> Self {
656 BottleneckReport {
657 bottlenecks: Vec::new(),
658 memory_pressure_high: false,
659 }
660 }
661
662 pub fn has_bottlenecks(&self) -> bool {
663 !self.bottlenecks.is_empty() || self.memory_pressure_high
664 }
665
666 pub fn summary(&self) -> String {
667 let mut report = String::new();
668
669 if self.bottlenecks.is_empty() && !self.memory_pressure_high {
670 report.push_str("No significant bottlenecks detected\n");
671 return report;
672 }
673
674 report.push_str("Performance Bottlenecks:\n\n");
675
676 for bottleneck in &self.bottlenecks {
677 report.push_str(&format!(
678 "• {} - {:.1}% of total time\n",
679 bottleneck.op_type, bottleneck.percentage
680 ));
681 report.push_str(&format!(
682 " {} calls, {:.3}ms avg, {:.3}ms total\n",
683 bottleneck.call_count,
684 bottleneck.avg_time.as_secs_f64() * 1000.0,
685 bottleneck.total_time.as_secs_f64() * 1000.0
686 ));
687 }
688
689 if self.memory_pressure_high {
690 report.push_str("\n⚠️ High memory pressure detected (>1GB peak)\n");
691 }
692
693 report
694 }
695}
696
697impl Default for BottleneckReport {
698 fn default() -> Self {
699 Self::new()
700 }
701}
702
703#[cfg(test)]
704mod tests {
705 use super::*;
706 use std::thread;
707
708 #[test]
709 fn test_op_profile() {
710 let mut profile = OpProfile::new("einsum");
711 assert_eq!(profile.count, 0);
712
713 profile.record(Duration::from_millis(10));
714 assert_eq!(profile.count, 1);
715 assert!(profile.total_time >= Duration::from_millis(10));
716
717 profile.record(Duration::from_millis(20));
718 assert_eq!(profile.count, 2);
719 }
720
721 #[test]
722 fn test_memory_profile() {
723 let mut mem = MemoryProfile::new();
724 assert_eq!(mem.current_bytes, 0);
725 assert_eq!(mem.peak_bytes, 0);
726
727 mem.record_allocation(1000);
728 assert_eq!(mem.current_bytes, 1000);
729 assert_eq!(mem.peak_bytes, 1000);
730
731 mem.record_allocation(500);
732 assert_eq!(mem.current_bytes, 1500);
733 assert_eq!(mem.peak_bytes, 1500);
734
735 mem.record_deallocation(500);
736 assert_eq!(mem.current_bytes, 1000);
737 assert_eq!(mem.peak_bytes, 1500); }
739
740 #[test]
741 fn test_profile_data() {
742 let mut data = ProfileData::new();
743
744 data.record_op("einsum", Duration::from_millis(100));
745 data.record_op("relu", Duration::from_millis(50));
746 data.record_op("einsum", Duration::from_millis(80));
747
748 let einsum_profile = data.get_op_profile("einsum").expect("unwrap");
749 assert_eq!(einsum_profile.count, 2);
750
751 let slowest = data.slowest_ops(2);
752 assert_eq!(slowest.len(), 2);
753 assert_eq!(slowest[0].0, "einsum"); }
755
756 #[test]
757 fn test_profiler() {
758 let mut profiler = Profiler::new();
759
760 profiler.start();
761
762 profiler.time_op("operation1", || {
764 thread::sleep(Duration::from_millis(1));
765 });
766
767 profiler.time_op("operation2", || {
768 thread::sleep(Duration::from_millis(2));
769 });
770
771 profiler.stop();
772
773 let data = profiler.data();
774 assert_eq!(data.op_profiles.len(), 2);
775 assert!(data.total_execution_time >= Duration::from_millis(3));
776 }
777
778 #[test]
779 fn test_profile_summary() {
780 let mut data = ProfileData::new();
781 data.record_op("einsum", Duration::from_millis(100));
782 data.record_op("relu", Duration::from_millis(50));
783 data.total_execution_time = Duration::from_millis(150);
784 data.memory.record_allocation(1024);
785
786 let summary = data.summary();
787 assert!(summary.contains("150"));
788 assert!(summary.contains("1024"));
789 assert!(summary.contains("einsum"));
790 }
791
792 #[test]
793 fn test_profiler_hook_creation() {
794 let hook = ProfilerHook::new();
795 assert_eq!(hook.profiler().data().op_profiles.len(), 0);
796 }
797
798 #[test]
799 fn test_profiler_hook_with_execution() {
800 use crate::context::ExecutionState;
801
802 let mut hook = ProfilerHook::new();
803 let state = ExecutionState::new(5);
804
805 hook.on_phase_change(ExecutionPhase::Executing, &state);
807 hook.on_node_start(0, &state);
808 thread::sleep(Duration::from_millis(1));
809 hook.on_node_complete(0, Duration::from_millis(1), &state);
810 hook.on_phase_change(ExecutionPhase::Completed, &state);
811
812 let data = hook.profiler().data();
813 assert!(!data.op_profiles.is_empty());
814 assert!(data.total_execution_time > Duration::ZERO);
815 }
816
817 #[test]
818 fn test_timeline_profiler() {
819 let mut profiler = TimelineProfiler::new();
820 profiler.start();
821
822 profiler.record_node_start(0, "einsum");
823 thread::sleep(Duration::from_millis(1));
824 profiler.record_node_end(0);
825
826 profiler.record_node_start(1, "relu");
827 thread::sleep(Duration::from_millis(1));
828 profiler.record_node_end(1);
829
830 let traces = profiler.traces();
831 assert_eq!(traces.len(), 2);
832 assert_eq!(traces[0].node_idx, 0);
833 assert_eq!(traces[1].node_idx, 1);
834 }
835
836 #[test]
837 fn test_timeline_summary() {
838 let mut profiler = TimelineProfiler::new();
839 profiler.start();
840
841 profiler.record_node_start(0, "einsum");
842 thread::sleep(Duration::from_millis(1));
843 profiler.record_node_end(0);
844
845 let summary = profiler.summary();
846 assert!(summary.contains("Node 0"));
847 assert!(summary.contains("einsum"));
848 }
849
850 #[test]
851 fn test_trace_entry() {
852 let entry = TraceEntry::new(
853 0,
854 Duration::from_millis(0),
855 Duration::from_millis(100),
856 "einsum",
857 );
858
859 assert_eq!(entry.node_idx, 0);
860 assert_eq!(entry.duration, Duration::from_millis(100));
861 assert_eq!(entry.op_type, "einsum");
862 }
863
864 #[test]
865 fn test_profile_statistics_empty() {
866 let stats = ProfileStatistics::from_durations(&mut []);
867 assert_eq!(stats.mean, 0.0);
868 assert_eq!(stats.median, 0.0);
869 }
870
871 #[test]
872 fn test_profile_statistics() {
873 let mut durations = vec![
874 Duration::from_millis(10),
875 Duration::from_millis(20),
876 Duration::from_millis(30),
877 Duration::from_millis(40),
878 Duration::from_millis(50),
879 ];
880
881 let stats = ProfileStatistics::from_durations(&mut durations);
882 assert!(stats.mean > 0.0);
883 assert!(stats.p50 > 0.0);
884 assert!(stats.p90 > 0.0);
885 assert!(stats.p95 > 0.0);
886 assert!(stats.p99 > 0.0);
887 assert!(stats.p99 >= stats.p95);
888 assert!(stats.p95 >= stats.p90);
889 assert!(stats.p90 >= stats.p50);
890 }
891
892 #[test]
893 fn test_performance_baseline_creation() {
894 let mut data = ProfileData::new();
895 data.record_op("einsum", Duration::from_millis(100));
896 data.record_op("relu", Duration::from_millis(50));
897 data.total_execution_time = Duration::from_millis(150);
898
899 let baseline = PerformanceBaseline::from_profile_data(&data);
900 assert_eq!(baseline.op_baselines.len(), 2);
901 assert!(baseline.op_baselines.contains_key("einsum"));
902 assert!(baseline.op_baselines.contains_key("relu"));
903 }
904
905 #[test]
906 fn test_performance_comparison_no_regression() {
907 let mut baseline_data = ProfileData::new();
908 baseline_data.record_op("einsum", Duration::from_millis(100));
909 baseline_data.total_execution_time = Duration::from_millis(100);
910
911 let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
912
913 let mut current_data = ProfileData::new();
914 current_data.record_op("einsum", Duration::from_millis(105)); current_data.total_execution_time = Duration::from_millis(105);
916
917 let comparison = baseline.compare(¤t_data);
918 assert!(!comparison.is_regression); }
920
921 #[test]
922 fn test_performance_comparison_with_regression() {
923 let mut baseline_data = ProfileData::new();
924 baseline_data.record_op("einsum", Duration::from_millis(100));
925 baseline_data.total_execution_time = Duration::from_millis(100);
926
927 let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
928
929 let mut current_data = ProfileData::new();
930 current_data.record_op("einsum", Duration::from_millis(120)); current_data.total_execution_time = Duration::from_millis(120);
932
933 let comparison = baseline.compare(¤t_data);
934 assert!(comparison.is_regression);
935 assert!(comparison.total_time_ratio > 1.1);
936 }
937
938 #[test]
939 fn test_performance_comparison_summary() {
940 let mut baseline_data = ProfileData::new();
941 baseline_data.record_op("einsum", Duration::from_millis(100));
942 baseline_data.total_execution_time = Duration::from_millis(100);
943
944 let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
945
946 let mut current_data = ProfileData::new();
947 current_data.record_op("einsum", Duration::from_millis(150));
948 current_data.total_execution_time = Duration::from_millis(150);
949
950 let comparison = baseline.compare(¤t_data);
951 let summary = comparison.summary();
952 assert!(summary.contains("REGRESSION") || summary.contains("1."));
953 }
954
955 #[test]
956 fn test_bottleneck_analyzer_no_bottlenecks() {
957 let mut data = ProfileData::new();
958 data.record_op("op1", Duration::from_millis(5));
959 data.record_op("op2", Duration::from_millis(5));
960 data.total_execution_time = Duration::from_millis(100);
961
962 let report = BottleneckAnalyzer::analyze(&data);
963 assert!(report.bottlenecks.is_empty());
964 assert!(!report.has_bottlenecks());
965 }
966
967 #[test]
968 fn test_bottleneck_analyzer_with_bottleneck() {
969 let mut data = ProfileData::new();
970 data.record_op("slow_op", Duration::from_millis(50));
971 data.record_op("fast_op", Duration::from_millis(5));
972 data.total_execution_time = Duration::from_millis(100);
973
974 let report = BottleneckAnalyzer::analyze(&data);
975 assert!(!report.bottlenecks.is_empty());
976 assert_eq!(report.bottlenecks[0].op_type, "slow_op");
977 assert!(report.bottlenecks[0].percentage > 10.0);
978 }
979
980 #[test]
981 fn test_bottleneck_analyzer_memory_pressure() {
982 let mut data = ProfileData::new();
983 data.total_execution_time = Duration::from_millis(100);
984 data.memory.record_allocation(2_000_000_000); let report = BottleneckAnalyzer::analyze(&data);
987 assert!(report.memory_pressure_high);
988 assert!(report.has_bottlenecks());
989 }
990
991 #[test]
992 fn test_bottleneck_report_summary() {
993 let mut data = ProfileData::new();
994 data.record_op("slow_op", Duration::from_millis(60));
995 data.total_execution_time = Duration::from_millis(100);
996
997 let report = BottleneckAnalyzer::analyze(&data);
998 let summary = report.summary();
999 assert!(summary.contains("slow_op"));
1000 assert!(summary.contains("%"));
1001 }
1002
1003 #[test]
1004 fn test_bottleneck_report_summary_no_issues() {
1005 let report = BottleneckReport::new();
1006 let summary = report.summary();
1007 assert!(summary.contains("No significant bottlenecks"));
1008 }
1009}