Skip to main content

tensorlogic_infer/
profiling.rs

1//! Execution profiling and performance monitoring.
2
3use crate::context::{ExecutionHook, ExecutionPhase, ExecutionState};
4use std::cmp::Reverse;
5use std::collections::HashMap;
6use std::time::{Duration, Instant};
7
8/// Profiling statistics for a single operation
9#[derive(Debug, Clone)]
10pub struct OpProfile {
11    pub op_type: String,
12    pub count: usize,
13    pub total_time: Duration,
14    pub avg_time: Duration,
15    pub min_time: Duration,
16    pub max_time: Duration,
17}
18
19impl OpProfile {
20    pub fn new(op_type: impl Into<String>) -> Self {
21        OpProfile {
22            op_type: op_type.into(),
23            count: 0,
24            total_time: Duration::ZERO,
25            avg_time: Duration::ZERO,
26            min_time: Duration::MAX,
27            max_time: Duration::ZERO,
28        }
29    }
30
31    pub fn record(&mut self, duration: Duration) {
32        self.count += 1;
33        self.total_time += duration;
34        self.avg_time = self.total_time / self.count as u32;
35        self.min_time = self.min_time.min(duration);
36        self.max_time = self.max_time.max(duration);
37    }
38}
39
40/// Memory usage statistics
41#[derive(Debug, Clone, Default)]
42pub struct MemoryProfile {
43    pub peak_bytes: usize,
44    pub current_bytes: usize,
45    pub allocations: usize,
46    pub deallocations: usize,
47}
48
49impl MemoryProfile {
50    pub fn new() -> Self {
51        Self::default()
52    }
53
54    pub fn record_allocation(&mut self, bytes: usize) {
55        self.current_bytes += bytes;
56        self.peak_bytes = self.peak_bytes.max(self.current_bytes);
57        self.allocations += 1;
58    }
59
60    pub fn record_deallocation(&mut self, bytes: usize) {
61        self.current_bytes = self.current_bytes.saturating_sub(bytes);
62        self.deallocations += 1;
63    }
64}
65
66/// Comprehensive profiling data
67#[derive(Debug, Clone)]
68pub struct ProfileData {
69    pub op_profiles: HashMap<String, OpProfile>,
70    pub memory: MemoryProfile,
71    pub total_execution_time: Duration,
72}
73
74impl ProfileData {
75    pub fn new() -> Self {
76        ProfileData {
77            op_profiles: HashMap::new(),
78            memory: MemoryProfile::new(),
79            total_execution_time: Duration::ZERO,
80        }
81    }
82
83    pub fn record_op(&mut self, op_type: impl Into<String>, duration: Duration) {
84        let op_type = op_type.into();
85        self.op_profiles
86            .entry(op_type.clone())
87            .or_insert_with(|| OpProfile::new(op_type))
88            .record(duration);
89    }
90
91    pub fn get_op_profile(&self, op_type: &str) -> Option<&OpProfile> {
92        self.op_profiles.get(op_type)
93    }
94
95    /// Get the slowest operations
96    pub fn slowest_ops(&self, limit: usize) -> Vec<(&str, &OpProfile)> {
97        let mut ops: Vec<_> = self
98            .op_profiles
99            .iter()
100            .map(|(name, profile)| (name.as_str(), profile))
101            .collect();
102
103        ops.sort_by_key(|b| Reverse(b.1.total_time));
104        ops.truncate(limit);
105        ops
106    }
107
108    /// Generate a summary report
109    pub fn summary(&self) -> String {
110        let mut report = String::new();
111        report.push_str(&format!(
112            "Total Execution Time: {:.3}ms\n",
113            self.total_execution_time.as_secs_f64() * 1000.0
114        ));
115        report.push_str(&format!("Peak Memory: {} bytes\n", self.memory.peak_bytes));
116        report.push_str("\nTop Operations by Time:\n");
117
118        for (name, profile) in self.slowest_ops(5) {
119            report.push_str(&format!(
120                "  {}: {} calls, {:.3}ms total, {:.3}ms avg\n",
121                name,
122                profile.count,
123                profile.total_time.as_secs_f64() * 1000.0,
124                profile.avg_time.as_secs_f64() * 1000.0
125            ));
126        }
127
128        report
129    }
130}
131
132impl Default for ProfileData {
133    fn default() -> Self {
134        Self::new()
135    }
136}
137
138/// Profiler that tracks execution metrics
139pub struct Profiler {
140    data: ProfileData,
141    start_time: Option<Instant>,
142}
143
144impl Profiler {
145    pub fn new() -> Self {
146        Profiler {
147            data: ProfileData::new(),
148            start_time: None,
149        }
150    }
151
152    pub fn start(&mut self) {
153        self.start_time = Some(Instant::now());
154    }
155
156    pub fn stop(&mut self) {
157        if let Some(start) = self.start_time.take() {
158            self.data.total_execution_time = start.elapsed();
159        }
160    }
161
162    /// Time an operation and record its duration
163    pub fn time_op<F, R>(&mut self, op_type: impl Into<String>, f: F) -> R
164    where
165        F: FnOnce() -> R,
166    {
167        let start = Instant::now();
168        let result = f();
169        let duration = start.elapsed();
170        self.data.record_op(op_type, duration);
171        result
172    }
173
174    pub fn record_allocation(&mut self, bytes: usize) {
175        self.data.memory.record_allocation(bytes);
176    }
177
178    pub fn record_deallocation(&mut self, bytes: usize) {
179        self.data.memory.record_deallocation(bytes);
180    }
181
182    pub fn data(&self) -> &ProfileData {
183        &self.data
184    }
185
186    pub fn into_data(mut self) -> ProfileData {
187        self.stop();
188        self.data
189    }
190}
191
192impl Default for Profiler {
193    fn default() -> Self {
194        Self::new()
195    }
196}
197
198/// Extension trait for profiled execution
199pub trait TlProfiledExecutor {
200    /// Get the current profiler
201    fn profiler(&self) -> Option<&Profiler>;
202
203    /// Get mutable access to profiler
204    fn profiler_mut(&mut self) -> Option<&mut Profiler>;
205
206    /// Enable profiling
207    fn enable_profiling(&mut self);
208
209    /// Disable profiling
210    fn disable_profiling(&mut self);
211
212    /// Get profiling data
213    fn get_profile_data(&self) -> Option<&ProfileData> {
214        self.profiler().map(|p| p.data())
215    }
216}
217
218/// Profiler hook for integration with ExecutionContext
219pub struct ProfilerHook {
220    profiler: Profiler,
221    node_timings: HashMap<usize, Instant>,
222}
223
224impl ProfilerHook {
225    pub fn new() -> Self {
226        ProfilerHook {
227            profiler: Profiler::new(),
228            node_timings: HashMap::new(),
229        }
230    }
231
232    pub fn profiler(&self) -> &Profiler {
233        &self.profiler
234    }
235
236    pub fn into_profiler(self) -> Profiler {
237        self.profiler
238    }
239
240    pub fn into_data(self) -> ProfileData {
241        self.profiler.into_data()
242    }
243}
244
245impl Default for ProfilerHook {
246    fn default() -> Self {
247        Self::new()
248    }
249}
250
251impl ExecutionHook for ProfilerHook {
252    fn on_phase_change(&mut self, phase: ExecutionPhase, _state: &ExecutionState) {
253        match phase {
254            ExecutionPhase::Executing => {
255                self.profiler.start();
256            }
257            ExecutionPhase::Completed | ExecutionPhase::Failed | ExecutionPhase::Cancelled => {
258                self.profiler.stop();
259            }
260            _ => {}
261        }
262    }
263
264    fn on_node_start(&mut self, node_idx: usize, _state: &ExecutionState) {
265        self.node_timings.insert(node_idx, Instant::now());
266    }
267
268    fn on_node_complete(&mut self, node_idx: usize, _duration: Duration, state: &ExecutionState) {
269        if let Some(start_time) = self.node_timings.remove(&node_idx) {
270            let duration = start_time.elapsed();
271            // Record with node index as operation type
272            self.profiler
273                .data
274                .record_op(format!("node_{}", node_idx), duration);
275
276            // If we have access to the actual op type from state, use it
277            if node_idx < state.total_nodes {
278                self.profiler.data.record_op("graph_node", duration);
279            }
280        }
281    }
282
283    fn on_error(&mut self, _error: &str, _state: &ExecutionState) {
284        // Stop profiler on error
285        self.profiler.stop();
286    }
287
288    fn on_complete(&mut self, _state: &ExecutionState) {
289        // Stop profiler on completion
290        self.profiler.stop();
291    }
292}
293
294/// Node-level execution trace entry
295#[derive(Debug, Clone)]
296pub struct TraceEntry {
297    pub node_idx: usize,
298    pub start_time: Duration,
299    pub end_time: Duration,
300    pub duration: Duration,
301    pub op_type: String,
302}
303
304impl TraceEntry {
305    pub fn new(
306        node_idx: usize,
307        start_time: Duration,
308        end_time: Duration,
309        op_type: impl Into<String>,
310    ) -> Self {
311        let duration = end_time - start_time;
312        TraceEntry {
313            node_idx,
314            start_time,
315            end_time,
316            duration,
317            op_type: op_type.into(),
318        }
319    }
320}
321
322/// Detailed execution timeline profiler
323pub struct TimelineProfiler {
324    traces: Vec<TraceEntry>,
325    start_instant: Option<Instant>,
326    node_starts: HashMap<usize, (Instant, String)>,
327}
328
329impl TimelineProfiler {
330    pub fn new() -> Self {
331        TimelineProfiler {
332            traces: Vec::new(),
333            start_instant: None,
334            node_starts: HashMap::new(),
335        }
336    }
337
338    pub fn start(&mut self) {
339        self.start_instant = Some(Instant::now());
340    }
341
342    pub fn record_node_start(&mut self, node_idx: usize, op_type: impl Into<String>) {
343        if self.start_instant.is_some() {
344            self.node_starts
345                .insert(node_idx, (Instant::now(), op_type.into()));
346        }
347    }
348
349    pub fn record_node_end(&mut self, node_idx: usize) {
350        if let (Some(start_instant), Some((node_start, op_type))) =
351            (self.start_instant, self.node_starts.remove(&node_idx))
352        {
353            let now = Instant::now();
354            let start_time = node_start.duration_since(start_instant);
355            let end_time = now.duration_since(start_instant);
356
357            self.traces
358                .push(TraceEntry::new(node_idx, start_time, end_time, op_type));
359        }
360    }
361
362    pub fn traces(&self) -> &[TraceEntry] {
363        &self.traces
364    }
365
366    /// Get critical path (longest chain of dependent nodes)
367    pub fn critical_path_duration(&self) -> Duration {
368        self.traces.iter().map(|t| t.duration).sum()
369    }
370
371    /// Get timeline summary
372    pub fn summary(&self) -> String {
373        let mut report = String::new();
374        let total_duration = self.critical_path_duration();
375
376        report.push_str(&format!(
377            "Total Timeline Duration: {:.3}ms\n",
378            total_duration.as_secs_f64() * 1000.0
379        ));
380        report.push_str(&format!("Traced Nodes: {}\n\n", self.traces.len()));
381
382        report.push_str("Node Timeline:\n");
383        for trace in &self.traces {
384            report.push_str(&format!(
385                "  Node {}: {:.3}ms - {:.3}ms ({:.3}ms) - {}\n",
386                trace.node_idx,
387                trace.start_time.as_secs_f64() * 1000.0,
388                trace.end_time.as_secs_f64() * 1000.0,
389                trace.duration.as_secs_f64() * 1000.0,
390                trace.op_type
391            ));
392        }
393
394        report
395    }
396}
397
398impl Default for TimelineProfiler {
399    fn default() -> Self {
400        Self::new()
401    }
402}
403
404/// Statistical metrics for profiling data
405#[derive(Debug, Clone)]
406pub struct ProfileStatistics {
407    pub mean: f64,
408    pub median: f64,
409    pub p50: f64,
410    pub p90: f64,
411    pub p95: f64,
412    pub p99: f64,
413    pub std_dev: f64,
414}
415
416impl ProfileStatistics {
417    /// Calculate statistics from a set of durations
418    pub fn from_durations(durations: &mut [Duration]) -> Self {
419        if durations.is_empty() {
420            return ProfileStatistics {
421                mean: 0.0,
422                median: 0.0,
423                p50: 0.0,
424                p90: 0.0,
425                p95: 0.0,
426                p99: 0.0,
427                std_dev: 0.0,
428            };
429        }
430
431        durations.sort();
432
433        let values: Vec<f64> = durations.iter().map(|d| d.as_secs_f64()).collect();
434        let n = values.len();
435
436        // Mean
437        let mean = values.iter().sum::<f64>() / n as f64;
438
439        // Standard deviation
440        let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n as f64;
441        let std_dev = variance.sqrt();
442
443        // Percentiles
444        let percentile = |p: f64| -> f64 {
445            let index = ((n as f64 * p).ceil() as usize).saturating_sub(1);
446            values[index.min(n - 1)]
447        };
448
449        ProfileStatistics {
450            mean,
451            median: percentile(0.50),
452            p50: percentile(0.50),
453            p90: percentile(0.90),
454            p95: percentile(0.95),
455            p99: percentile(0.99),
456            std_dev,
457        }
458    }
459}
460
461/// Performance baseline for regression detection
462#[derive(Debug, Clone)]
463pub struct PerformanceBaseline {
464    pub op_baselines: HashMap<String, ProfileStatistics>,
465    pub total_time_baseline: Duration,
466}
467
468impl PerformanceBaseline {
469    pub fn new() -> Self {
470        PerformanceBaseline {
471            op_baselines: HashMap::new(),
472            total_time_baseline: Duration::ZERO,
473        }
474    }
475
476    pub fn from_profile_data(data: &ProfileData) -> Self {
477        let mut baseline = PerformanceBaseline::new();
478        baseline.total_time_baseline = data.total_execution_time;
479
480        for (op_type, profile) in &data.op_profiles {
481            // Create a simple baseline from existing data
482            let stats = ProfileStatistics {
483                mean: profile.avg_time.as_secs_f64(),
484                median: profile.avg_time.as_secs_f64(),
485                p50: profile.avg_time.as_secs_f64(),
486                p90: profile.max_time.as_secs_f64(),
487                p95: profile.max_time.as_secs_f64(),
488                p99: profile.max_time.as_secs_f64(),
489                std_dev: 0.0,
490            };
491            baseline.op_baselines.insert(op_type.clone(), stats);
492        }
493
494        baseline
495    }
496
497    /// Compare current data against baseline
498    pub fn compare(&self, data: &ProfileData) -> PerformanceComparison {
499        let mut comparison = PerformanceComparison::new();
500
501        // Compare total time
502        let total_time_ratio = if self.total_time_baseline.as_secs_f64() > 0.0 {
503            data.total_execution_time.as_secs_f64() / self.total_time_baseline.as_secs_f64()
504        } else {
505            1.0
506        };
507
508        comparison.total_time_ratio = total_time_ratio;
509        comparison.is_regression = total_time_ratio > 1.1; // 10% slower is regression
510
511        // Compare per-operation
512        for (op_type, profile) in &data.op_profiles {
513            if let Some(baseline_stats) = self.op_baselines.get(op_type) {
514                let current_mean = profile.avg_time.as_secs_f64();
515                let ratio = current_mean / baseline_stats.mean;
516                if ratio > 1.1 {
517                    comparison.slow_ops.push((
518                        op_type.clone(),
519                        ratio,
520                        current_mean - baseline_stats.mean,
521                    ));
522                }
523            }
524        }
525
526        comparison
527            .slow_ops
528            .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
529
530        comparison
531    }
532}
533
534impl Default for PerformanceBaseline {
535    fn default() -> Self {
536        Self::new()
537    }
538}
539
540/// Result of comparing performance against baseline
541#[derive(Debug, Clone)]
542pub struct PerformanceComparison {
543    pub is_regression: bool,
544    pub total_time_ratio: f64,
545    pub slow_ops: Vec<(String, f64, f64)>, // (op_type, ratio, absolute_diff)
546}
547
548impl PerformanceComparison {
549    pub fn new() -> Self {
550        PerformanceComparison {
551            is_regression: false,
552            total_time_ratio: 1.0,
553            slow_ops: Vec::new(),
554        }
555    }
556
557    pub fn summary(&self) -> String {
558        let mut report = String::new();
559
560        if self.is_regression {
561            report.push_str("⚠️  PERFORMANCE REGRESSION DETECTED\n\n");
562        } else {
563            report.push_str("✓ Performance within acceptable range\n\n");
564        }
565
566        report.push_str(&format!(
567            "Total Time Ratio: {:.2}x\n",
568            self.total_time_ratio
569        ));
570
571        if !self.slow_ops.is_empty() {
572            report.push_str("\nSlower Operations:\n");
573            for (op_type, ratio, diff) in &self.slow_ops {
574                report.push_str(&format!(
575                    "  {}: {:.2}x slower (+{:.3}ms)\n",
576                    op_type,
577                    ratio,
578                    diff * 1000.0
579                ));
580            }
581        }
582
583        report
584    }
585}
586
587impl Default for PerformanceComparison {
588    fn default() -> Self {
589        Self::new()
590    }
591}
592
593/// Bottleneck analyzer for identifying performance issues
594pub struct BottleneckAnalyzer;
595
596impl BottleneckAnalyzer {
597    /// Analyze profile data and identify bottlenecks
598    pub fn analyze(data: &ProfileData) -> BottleneckReport {
599        let mut report = BottleneckReport::new();
600
601        // Find operations taking > 10% of total time
602        let total_time = data.total_execution_time.as_secs_f64();
603        if total_time > 0.0 {
604            for (op_type, profile) in &data.op_profiles {
605                let op_time = profile.total_time.as_secs_f64();
606                let percentage = (op_time / total_time) * 100.0;
607
608                if percentage > 10.0 {
609                    report.bottlenecks.push(Bottleneck {
610                        op_type: op_type.clone(),
611                        percentage,
612                        total_time: profile.total_time,
613                        call_count: profile.count,
614                        avg_time: profile.avg_time,
615                    });
616                }
617            }
618        }
619
620        // Sort by percentage (highest first)
621        report.bottlenecks.sort_by(|a, b| {
622            b.percentage
623                .partial_cmp(&a.percentage)
624                .unwrap_or(std::cmp::Ordering::Equal)
625        });
626
627        // Calculate memory pressure
628        if data.memory.peak_bytes > 1_000_000_000 {
629            // > 1GB
630            report.memory_pressure_high = true;
631        }
632
633        report
634    }
635}
636
637/// Identified performance bottleneck
638#[derive(Debug, Clone)]
639pub struct Bottleneck {
640    pub op_type: String,
641    pub percentage: f64,
642    pub total_time: Duration,
643    pub call_count: usize,
644    pub avg_time: Duration,
645}
646
647/// Bottleneck analysis report
648#[derive(Debug, Clone)]
649pub struct BottleneckReport {
650    pub bottlenecks: Vec<Bottleneck>,
651    pub memory_pressure_high: bool,
652}
653
654impl BottleneckReport {
655    pub fn new() -> Self {
656        BottleneckReport {
657            bottlenecks: Vec::new(),
658            memory_pressure_high: false,
659        }
660    }
661
662    pub fn has_bottlenecks(&self) -> bool {
663        !self.bottlenecks.is_empty() || self.memory_pressure_high
664    }
665
666    pub fn summary(&self) -> String {
667        let mut report = String::new();
668
669        if self.bottlenecks.is_empty() && !self.memory_pressure_high {
670            report.push_str("No significant bottlenecks detected\n");
671            return report;
672        }
673
674        report.push_str("Performance Bottlenecks:\n\n");
675
676        for bottleneck in &self.bottlenecks {
677            report.push_str(&format!(
678                "• {} - {:.1}% of total time\n",
679                bottleneck.op_type, bottleneck.percentage
680            ));
681            report.push_str(&format!(
682                "  {} calls, {:.3}ms avg, {:.3}ms total\n",
683                bottleneck.call_count,
684                bottleneck.avg_time.as_secs_f64() * 1000.0,
685                bottleneck.total_time.as_secs_f64() * 1000.0
686            ));
687        }
688
689        if self.memory_pressure_high {
690            report.push_str("\n⚠️  High memory pressure detected (>1GB peak)\n");
691        }
692
693        report
694    }
695}
696
697impl Default for BottleneckReport {
698    fn default() -> Self {
699        Self::new()
700    }
701}
702
703#[cfg(test)]
704mod tests {
705    use super::*;
706    use std::thread;
707
708    #[test]
709    fn test_op_profile() {
710        let mut profile = OpProfile::new("einsum");
711        assert_eq!(profile.count, 0);
712
713        profile.record(Duration::from_millis(10));
714        assert_eq!(profile.count, 1);
715        assert!(profile.total_time >= Duration::from_millis(10));
716
717        profile.record(Duration::from_millis(20));
718        assert_eq!(profile.count, 2);
719    }
720
721    #[test]
722    fn test_memory_profile() {
723        let mut mem = MemoryProfile::new();
724        assert_eq!(mem.current_bytes, 0);
725        assert_eq!(mem.peak_bytes, 0);
726
727        mem.record_allocation(1000);
728        assert_eq!(mem.current_bytes, 1000);
729        assert_eq!(mem.peak_bytes, 1000);
730
731        mem.record_allocation(500);
732        assert_eq!(mem.current_bytes, 1500);
733        assert_eq!(mem.peak_bytes, 1500);
734
735        mem.record_deallocation(500);
736        assert_eq!(mem.current_bytes, 1000);
737        assert_eq!(mem.peak_bytes, 1500); // Peak stays at max
738    }
739
740    #[test]
741    fn test_profile_data() {
742        let mut data = ProfileData::new();
743
744        data.record_op("einsum", Duration::from_millis(100));
745        data.record_op("relu", Duration::from_millis(50));
746        data.record_op("einsum", Duration::from_millis(80));
747
748        let einsum_profile = data.get_op_profile("einsum").expect("unwrap");
749        assert_eq!(einsum_profile.count, 2);
750
751        let slowest = data.slowest_ops(2);
752        assert_eq!(slowest.len(), 2);
753        assert_eq!(slowest[0].0, "einsum"); // Should be slowest
754    }
755
756    #[test]
757    fn test_profiler() {
758        let mut profiler = Profiler::new();
759
760        profiler.start();
761
762        // Simulate some work
763        profiler.time_op("operation1", || {
764            thread::sleep(Duration::from_millis(1));
765        });
766
767        profiler.time_op("operation2", || {
768            thread::sleep(Duration::from_millis(2));
769        });
770
771        profiler.stop();
772
773        let data = profiler.data();
774        assert_eq!(data.op_profiles.len(), 2);
775        assert!(data.total_execution_time >= Duration::from_millis(3));
776    }
777
778    #[test]
779    fn test_profile_summary() {
780        let mut data = ProfileData::new();
781        data.record_op("einsum", Duration::from_millis(100));
782        data.record_op("relu", Duration::from_millis(50));
783        data.total_execution_time = Duration::from_millis(150);
784        data.memory.record_allocation(1024);
785
786        let summary = data.summary();
787        assert!(summary.contains("150"));
788        assert!(summary.contains("1024"));
789        assert!(summary.contains("einsum"));
790    }
791
792    #[test]
793    fn test_profiler_hook_creation() {
794        let hook = ProfilerHook::new();
795        assert_eq!(hook.profiler().data().op_profiles.len(), 0);
796    }
797
798    #[test]
799    fn test_profiler_hook_with_execution() {
800        use crate::context::ExecutionState;
801
802        let mut hook = ProfilerHook::new();
803        let state = ExecutionState::new(5);
804
805        // Simulate execution flow
806        hook.on_phase_change(ExecutionPhase::Executing, &state);
807        hook.on_node_start(0, &state);
808        thread::sleep(Duration::from_millis(1));
809        hook.on_node_complete(0, Duration::from_millis(1), &state);
810        hook.on_phase_change(ExecutionPhase::Completed, &state);
811
812        let data = hook.profiler().data();
813        assert!(!data.op_profiles.is_empty());
814        assert!(data.total_execution_time > Duration::ZERO);
815    }
816
817    #[test]
818    fn test_timeline_profiler() {
819        let mut profiler = TimelineProfiler::new();
820        profiler.start();
821
822        profiler.record_node_start(0, "einsum");
823        thread::sleep(Duration::from_millis(1));
824        profiler.record_node_end(0);
825
826        profiler.record_node_start(1, "relu");
827        thread::sleep(Duration::from_millis(1));
828        profiler.record_node_end(1);
829
830        let traces = profiler.traces();
831        assert_eq!(traces.len(), 2);
832        assert_eq!(traces[0].node_idx, 0);
833        assert_eq!(traces[1].node_idx, 1);
834    }
835
836    #[test]
837    fn test_timeline_summary() {
838        let mut profiler = TimelineProfiler::new();
839        profiler.start();
840
841        profiler.record_node_start(0, "einsum");
842        thread::sleep(Duration::from_millis(1));
843        profiler.record_node_end(0);
844
845        let summary = profiler.summary();
846        assert!(summary.contains("Node 0"));
847        assert!(summary.contains("einsum"));
848    }
849
850    #[test]
851    fn test_trace_entry() {
852        let entry = TraceEntry::new(
853            0,
854            Duration::from_millis(0),
855            Duration::from_millis(100),
856            "einsum",
857        );
858
859        assert_eq!(entry.node_idx, 0);
860        assert_eq!(entry.duration, Duration::from_millis(100));
861        assert_eq!(entry.op_type, "einsum");
862    }
863
864    #[test]
865    fn test_profile_statistics_empty() {
866        let stats = ProfileStatistics::from_durations(&mut []);
867        assert_eq!(stats.mean, 0.0);
868        assert_eq!(stats.median, 0.0);
869    }
870
871    #[test]
872    fn test_profile_statistics() {
873        let mut durations = vec![
874            Duration::from_millis(10),
875            Duration::from_millis(20),
876            Duration::from_millis(30),
877            Duration::from_millis(40),
878            Duration::from_millis(50),
879        ];
880
881        let stats = ProfileStatistics::from_durations(&mut durations);
882        assert!(stats.mean > 0.0);
883        assert!(stats.p50 > 0.0);
884        assert!(stats.p90 > 0.0);
885        assert!(stats.p95 > 0.0);
886        assert!(stats.p99 > 0.0);
887        assert!(stats.p99 >= stats.p95);
888        assert!(stats.p95 >= stats.p90);
889        assert!(stats.p90 >= stats.p50);
890    }
891
892    #[test]
893    fn test_performance_baseline_creation() {
894        let mut data = ProfileData::new();
895        data.record_op("einsum", Duration::from_millis(100));
896        data.record_op("relu", Duration::from_millis(50));
897        data.total_execution_time = Duration::from_millis(150);
898
899        let baseline = PerformanceBaseline::from_profile_data(&data);
900        assert_eq!(baseline.op_baselines.len(), 2);
901        assert!(baseline.op_baselines.contains_key("einsum"));
902        assert!(baseline.op_baselines.contains_key("relu"));
903    }
904
905    #[test]
906    fn test_performance_comparison_no_regression() {
907        let mut baseline_data = ProfileData::new();
908        baseline_data.record_op("einsum", Duration::from_millis(100));
909        baseline_data.total_execution_time = Duration::from_millis(100);
910
911        let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
912
913        let mut current_data = ProfileData::new();
914        current_data.record_op("einsum", Duration::from_millis(105)); // 5% slower
915        current_data.total_execution_time = Duration::from_millis(105);
916
917        let comparison = baseline.compare(&current_data);
918        assert!(!comparison.is_regression); // Within 10% threshold
919    }
920
921    #[test]
922    fn test_performance_comparison_with_regression() {
923        let mut baseline_data = ProfileData::new();
924        baseline_data.record_op("einsum", Duration::from_millis(100));
925        baseline_data.total_execution_time = Duration::from_millis(100);
926
927        let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
928
929        let mut current_data = ProfileData::new();
930        current_data.record_op("einsum", Duration::from_millis(120)); // 20% slower
931        current_data.total_execution_time = Duration::from_millis(120);
932
933        let comparison = baseline.compare(&current_data);
934        assert!(comparison.is_regression);
935        assert!(comparison.total_time_ratio > 1.1);
936    }
937
938    #[test]
939    fn test_performance_comparison_summary() {
940        let mut baseline_data = ProfileData::new();
941        baseline_data.record_op("einsum", Duration::from_millis(100));
942        baseline_data.total_execution_time = Duration::from_millis(100);
943
944        let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
945
946        let mut current_data = ProfileData::new();
947        current_data.record_op("einsum", Duration::from_millis(150));
948        current_data.total_execution_time = Duration::from_millis(150);
949
950        let comparison = baseline.compare(&current_data);
951        let summary = comparison.summary();
952        assert!(summary.contains("REGRESSION") || summary.contains("1."));
953    }
954
955    #[test]
956    fn test_bottleneck_analyzer_no_bottlenecks() {
957        let mut data = ProfileData::new();
958        data.record_op("op1", Duration::from_millis(5));
959        data.record_op("op2", Duration::from_millis(5));
960        data.total_execution_time = Duration::from_millis(100);
961
962        let report = BottleneckAnalyzer::analyze(&data);
963        assert!(report.bottlenecks.is_empty());
964        assert!(!report.has_bottlenecks());
965    }
966
967    #[test]
968    fn test_bottleneck_analyzer_with_bottleneck() {
969        let mut data = ProfileData::new();
970        data.record_op("slow_op", Duration::from_millis(50));
971        data.record_op("fast_op", Duration::from_millis(5));
972        data.total_execution_time = Duration::from_millis(100);
973
974        let report = BottleneckAnalyzer::analyze(&data);
975        assert!(!report.bottlenecks.is_empty());
976        assert_eq!(report.bottlenecks[0].op_type, "slow_op");
977        assert!(report.bottlenecks[0].percentage > 10.0);
978    }
979
980    #[test]
981    fn test_bottleneck_analyzer_memory_pressure() {
982        let mut data = ProfileData::new();
983        data.total_execution_time = Duration::from_millis(100);
984        data.memory.record_allocation(2_000_000_000); // 2GB
985
986        let report = BottleneckAnalyzer::analyze(&data);
987        assert!(report.memory_pressure_high);
988        assert!(report.has_bottlenecks());
989    }
990
991    #[test]
992    fn test_bottleneck_report_summary() {
993        let mut data = ProfileData::new();
994        data.record_op("slow_op", Duration::from_millis(60));
995        data.total_execution_time = Duration::from_millis(100);
996
997        let report = BottleneckAnalyzer::analyze(&data);
998        let summary = report.summary();
999        assert!(summary.contains("slow_op"));
1000        assert!(summary.contains("%"));
1001    }
1002
1003    #[test]
1004    fn test_bottleneck_report_summary_no_issues() {
1005        let report = BottleneckReport::new();
1006        let summary = report.summary();
1007        assert!(summary.contains("No significant bottlenecks"));
1008    }
1009}