Skip to main content

tensorlogic_infer/
profiling.rs

1//! Execution profiling and performance monitoring.
2
3use crate::context::{ExecutionHook, ExecutionPhase, ExecutionState};
4use std::collections::HashMap;
5use std::time::{Duration, Instant};
6
7/// Profiling statistics for a single operation
8#[derive(Debug, Clone)]
9pub struct OpProfile {
10    pub op_type: String,
11    pub count: usize,
12    pub total_time: Duration,
13    pub avg_time: Duration,
14    pub min_time: Duration,
15    pub max_time: Duration,
16}
17
18impl OpProfile {
19    pub fn new(op_type: impl Into<String>) -> Self {
20        OpProfile {
21            op_type: op_type.into(),
22            count: 0,
23            total_time: Duration::ZERO,
24            avg_time: Duration::ZERO,
25            min_time: Duration::MAX,
26            max_time: Duration::ZERO,
27        }
28    }
29
30    pub fn record(&mut self, duration: Duration) {
31        self.count += 1;
32        self.total_time += duration;
33        self.avg_time = self.total_time / self.count as u32;
34        self.min_time = self.min_time.min(duration);
35        self.max_time = self.max_time.max(duration);
36    }
37}
38
39/// Memory usage statistics
40#[derive(Debug, Clone, Default)]
41pub struct MemoryProfile {
42    pub peak_bytes: usize,
43    pub current_bytes: usize,
44    pub allocations: usize,
45    pub deallocations: usize,
46}
47
48impl MemoryProfile {
49    pub fn new() -> Self {
50        Self::default()
51    }
52
53    pub fn record_allocation(&mut self, bytes: usize) {
54        self.current_bytes += bytes;
55        self.peak_bytes = self.peak_bytes.max(self.current_bytes);
56        self.allocations += 1;
57    }
58
59    pub fn record_deallocation(&mut self, bytes: usize) {
60        self.current_bytes = self.current_bytes.saturating_sub(bytes);
61        self.deallocations += 1;
62    }
63}
64
65/// Comprehensive profiling data
66#[derive(Debug, Clone)]
67pub struct ProfileData {
68    pub op_profiles: HashMap<String, OpProfile>,
69    pub memory: MemoryProfile,
70    pub total_execution_time: Duration,
71}
72
73impl ProfileData {
74    pub fn new() -> Self {
75        ProfileData {
76            op_profiles: HashMap::new(),
77            memory: MemoryProfile::new(),
78            total_execution_time: Duration::ZERO,
79        }
80    }
81
82    pub fn record_op(&mut self, op_type: impl Into<String>, duration: Duration) {
83        let op_type = op_type.into();
84        self.op_profiles
85            .entry(op_type.clone())
86            .or_insert_with(|| OpProfile::new(op_type))
87            .record(duration);
88    }
89
90    pub fn get_op_profile(&self, op_type: &str) -> Option<&OpProfile> {
91        self.op_profiles.get(op_type)
92    }
93
94    /// Get the slowest operations
95    pub fn slowest_ops(&self, limit: usize) -> Vec<(&str, &OpProfile)> {
96        let mut ops: Vec<_> = self
97            .op_profiles
98            .iter()
99            .map(|(name, profile)| (name.as_str(), profile))
100            .collect();
101
102        ops.sort_by(|a, b| b.1.total_time.cmp(&a.1.total_time));
103        ops.truncate(limit);
104        ops
105    }
106
107    /// Generate a summary report
108    pub fn summary(&self) -> String {
109        let mut report = String::new();
110        report.push_str(&format!(
111            "Total Execution Time: {:.3}ms\n",
112            self.total_execution_time.as_secs_f64() * 1000.0
113        ));
114        report.push_str(&format!("Peak Memory: {} bytes\n", self.memory.peak_bytes));
115        report.push_str("\nTop Operations by Time:\n");
116
117        for (name, profile) in self.slowest_ops(5) {
118            report.push_str(&format!(
119                "  {}: {} calls, {:.3}ms total, {:.3}ms avg\n",
120                name,
121                profile.count,
122                profile.total_time.as_secs_f64() * 1000.0,
123                profile.avg_time.as_secs_f64() * 1000.0
124            ));
125        }
126
127        report
128    }
129}
130
131impl Default for ProfileData {
132    fn default() -> Self {
133        Self::new()
134    }
135}
136
137/// Profiler that tracks execution metrics
138pub struct Profiler {
139    data: ProfileData,
140    start_time: Option<Instant>,
141}
142
143impl Profiler {
144    pub fn new() -> Self {
145        Profiler {
146            data: ProfileData::new(),
147            start_time: None,
148        }
149    }
150
151    pub fn start(&mut self) {
152        self.start_time = Some(Instant::now());
153    }
154
155    pub fn stop(&mut self) {
156        if let Some(start) = self.start_time.take() {
157            self.data.total_execution_time = start.elapsed();
158        }
159    }
160
161    /// Time an operation and record its duration
162    pub fn time_op<F, R>(&mut self, op_type: impl Into<String>, f: F) -> R
163    where
164        F: FnOnce() -> R,
165    {
166        let start = Instant::now();
167        let result = f();
168        let duration = start.elapsed();
169        self.data.record_op(op_type, duration);
170        result
171    }
172
173    pub fn record_allocation(&mut self, bytes: usize) {
174        self.data.memory.record_allocation(bytes);
175    }
176
177    pub fn record_deallocation(&mut self, bytes: usize) {
178        self.data.memory.record_deallocation(bytes);
179    }
180
181    pub fn data(&self) -> &ProfileData {
182        &self.data
183    }
184
185    pub fn into_data(mut self) -> ProfileData {
186        self.stop();
187        self.data
188    }
189}
190
191impl Default for Profiler {
192    fn default() -> Self {
193        Self::new()
194    }
195}
196
197/// Extension trait for profiled execution
198pub trait TlProfiledExecutor {
199    /// Get the current profiler
200    fn profiler(&self) -> Option<&Profiler>;
201
202    /// Get mutable access to profiler
203    fn profiler_mut(&mut self) -> Option<&mut Profiler>;
204
205    /// Enable profiling
206    fn enable_profiling(&mut self);
207
208    /// Disable profiling
209    fn disable_profiling(&mut self);
210
211    /// Get profiling data
212    fn get_profile_data(&self) -> Option<&ProfileData> {
213        self.profiler().map(|p| p.data())
214    }
215}
216
217/// Profiler hook for integration with ExecutionContext
218pub struct ProfilerHook {
219    profiler: Profiler,
220    node_timings: HashMap<usize, Instant>,
221}
222
223impl ProfilerHook {
224    pub fn new() -> Self {
225        ProfilerHook {
226            profiler: Profiler::new(),
227            node_timings: HashMap::new(),
228        }
229    }
230
231    pub fn profiler(&self) -> &Profiler {
232        &self.profiler
233    }
234
235    pub fn into_profiler(self) -> Profiler {
236        self.profiler
237    }
238
239    pub fn into_data(self) -> ProfileData {
240        self.profiler.into_data()
241    }
242}
243
244impl Default for ProfilerHook {
245    fn default() -> Self {
246        Self::new()
247    }
248}
249
250impl ExecutionHook for ProfilerHook {
251    fn on_phase_change(&mut self, phase: ExecutionPhase, _state: &ExecutionState) {
252        match phase {
253            ExecutionPhase::Executing => {
254                self.profiler.start();
255            }
256            ExecutionPhase::Completed | ExecutionPhase::Failed | ExecutionPhase::Cancelled => {
257                self.profiler.stop();
258            }
259            _ => {}
260        }
261    }
262
263    fn on_node_start(&mut self, node_idx: usize, _state: &ExecutionState) {
264        self.node_timings.insert(node_idx, Instant::now());
265    }
266
267    fn on_node_complete(&mut self, node_idx: usize, _duration: Duration, state: &ExecutionState) {
268        if let Some(start_time) = self.node_timings.remove(&node_idx) {
269            let duration = start_time.elapsed();
270            // Record with node index as operation type
271            self.profiler
272                .data
273                .record_op(format!("node_{}", node_idx), duration);
274
275            // If we have access to the actual op type from state, use it
276            if node_idx < state.total_nodes {
277                self.profiler.data.record_op("graph_node", duration);
278            }
279        }
280    }
281
282    fn on_error(&mut self, _error: &str, _state: &ExecutionState) {
283        // Stop profiler on error
284        self.profiler.stop();
285    }
286
287    fn on_complete(&mut self, _state: &ExecutionState) {
288        // Stop profiler on completion
289        self.profiler.stop();
290    }
291}
292
293/// Node-level execution trace entry
294#[derive(Debug, Clone)]
295pub struct TraceEntry {
296    pub node_idx: usize,
297    pub start_time: Duration,
298    pub end_time: Duration,
299    pub duration: Duration,
300    pub op_type: String,
301}
302
303impl TraceEntry {
304    pub fn new(
305        node_idx: usize,
306        start_time: Duration,
307        end_time: Duration,
308        op_type: impl Into<String>,
309    ) -> Self {
310        let duration = end_time - start_time;
311        TraceEntry {
312            node_idx,
313            start_time,
314            end_time,
315            duration,
316            op_type: op_type.into(),
317        }
318    }
319}
320
321/// Detailed execution timeline profiler
322pub struct TimelineProfiler {
323    traces: Vec<TraceEntry>,
324    start_instant: Option<Instant>,
325    node_starts: HashMap<usize, (Instant, String)>,
326}
327
328impl TimelineProfiler {
329    pub fn new() -> Self {
330        TimelineProfiler {
331            traces: Vec::new(),
332            start_instant: None,
333            node_starts: HashMap::new(),
334        }
335    }
336
337    pub fn start(&mut self) {
338        self.start_instant = Some(Instant::now());
339    }
340
341    pub fn record_node_start(&mut self, node_idx: usize, op_type: impl Into<String>) {
342        if self.start_instant.is_some() {
343            self.node_starts
344                .insert(node_idx, (Instant::now(), op_type.into()));
345        }
346    }
347
348    pub fn record_node_end(&mut self, node_idx: usize) {
349        if let (Some(start_instant), Some((node_start, op_type))) =
350            (self.start_instant, self.node_starts.remove(&node_idx))
351        {
352            let now = Instant::now();
353            let start_time = node_start.duration_since(start_instant);
354            let end_time = now.duration_since(start_instant);
355
356            self.traces
357                .push(TraceEntry::new(node_idx, start_time, end_time, op_type));
358        }
359    }
360
361    pub fn traces(&self) -> &[TraceEntry] {
362        &self.traces
363    }
364
365    /// Get critical path (longest chain of dependent nodes)
366    pub fn critical_path_duration(&self) -> Duration {
367        self.traces.iter().map(|t| t.duration).sum()
368    }
369
370    /// Get timeline summary
371    pub fn summary(&self) -> String {
372        let mut report = String::new();
373        let total_duration = self.critical_path_duration();
374
375        report.push_str(&format!(
376            "Total Timeline Duration: {:.3}ms\n",
377            total_duration.as_secs_f64() * 1000.0
378        ));
379        report.push_str(&format!("Traced Nodes: {}\n\n", self.traces.len()));
380
381        report.push_str("Node Timeline:\n");
382        for trace in &self.traces {
383            report.push_str(&format!(
384                "  Node {}: {:.3}ms - {:.3}ms ({:.3}ms) - {}\n",
385                trace.node_idx,
386                trace.start_time.as_secs_f64() * 1000.0,
387                trace.end_time.as_secs_f64() * 1000.0,
388                trace.duration.as_secs_f64() * 1000.0,
389                trace.op_type
390            ));
391        }
392
393        report
394    }
395}
396
397impl Default for TimelineProfiler {
398    fn default() -> Self {
399        Self::new()
400    }
401}
402
403/// Statistical metrics for profiling data
404#[derive(Debug, Clone)]
405pub struct ProfileStatistics {
406    pub mean: f64,
407    pub median: f64,
408    pub p50: f64,
409    pub p90: f64,
410    pub p95: f64,
411    pub p99: f64,
412    pub std_dev: f64,
413}
414
415impl ProfileStatistics {
416    /// Calculate statistics from a set of durations
417    pub fn from_durations(durations: &mut [Duration]) -> Self {
418        if durations.is_empty() {
419            return ProfileStatistics {
420                mean: 0.0,
421                median: 0.0,
422                p50: 0.0,
423                p90: 0.0,
424                p95: 0.0,
425                p99: 0.0,
426                std_dev: 0.0,
427            };
428        }
429
430        durations.sort();
431
432        let values: Vec<f64> = durations.iter().map(|d| d.as_secs_f64()).collect();
433        let n = values.len();
434
435        // Mean
436        let mean = values.iter().sum::<f64>() / n as f64;
437
438        // Standard deviation
439        let variance = values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / n as f64;
440        let std_dev = variance.sqrt();
441
442        // Percentiles
443        let percentile = |p: f64| -> f64 {
444            let index = ((n as f64 * p).ceil() as usize).saturating_sub(1);
445            values[index.min(n - 1)]
446        };
447
448        ProfileStatistics {
449            mean,
450            median: percentile(0.50),
451            p50: percentile(0.50),
452            p90: percentile(0.90),
453            p95: percentile(0.95),
454            p99: percentile(0.99),
455            std_dev,
456        }
457    }
458}
459
460/// Performance baseline for regression detection
461#[derive(Debug, Clone)]
462pub struct PerformanceBaseline {
463    pub op_baselines: HashMap<String, ProfileStatistics>,
464    pub total_time_baseline: Duration,
465}
466
467impl PerformanceBaseline {
468    pub fn new() -> Self {
469        PerformanceBaseline {
470            op_baselines: HashMap::new(),
471            total_time_baseline: Duration::ZERO,
472        }
473    }
474
475    pub fn from_profile_data(data: &ProfileData) -> Self {
476        let mut baseline = PerformanceBaseline::new();
477        baseline.total_time_baseline = data.total_execution_time;
478
479        for (op_type, profile) in &data.op_profiles {
480            // Create a simple baseline from existing data
481            let stats = ProfileStatistics {
482                mean: profile.avg_time.as_secs_f64(),
483                median: profile.avg_time.as_secs_f64(),
484                p50: profile.avg_time.as_secs_f64(),
485                p90: profile.max_time.as_secs_f64(),
486                p95: profile.max_time.as_secs_f64(),
487                p99: profile.max_time.as_secs_f64(),
488                std_dev: 0.0,
489            };
490            baseline.op_baselines.insert(op_type.clone(), stats);
491        }
492
493        baseline
494    }
495
496    /// Compare current data against baseline
497    pub fn compare(&self, data: &ProfileData) -> PerformanceComparison {
498        let mut comparison = PerformanceComparison::new();
499
500        // Compare total time
501        let total_time_ratio = if self.total_time_baseline.as_secs_f64() > 0.0 {
502            data.total_execution_time.as_secs_f64() / self.total_time_baseline.as_secs_f64()
503        } else {
504            1.0
505        };
506
507        comparison.total_time_ratio = total_time_ratio;
508        comparison.is_regression = total_time_ratio > 1.1; // 10% slower is regression
509
510        // Compare per-operation
511        for (op_type, profile) in &data.op_profiles {
512            if let Some(baseline_stats) = self.op_baselines.get(op_type) {
513                let current_mean = profile.avg_time.as_secs_f64();
514                let ratio = current_mean / baseline_stats.mean;
515                if ratio > 1.1 {
516                    comparison.slow_ops.push((
517                        op_type.clone(),
518                        ratio,
519                        current_mean - baseline_stats.mean,
520                    ));
521                }
522            }
523        }
524
525        comparison
526            .slow_ops
527            .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
528
529        comparison
530    }
531}
532
533impl Default for PerformanceBaseline {
534    fn default() -> Self {
535        Self::new()
536    }
537}
538
539/// Result of comparing performance against baseline
540#[derive(Debug, Clone)]
541pub struct PerformanceComparison {
542    pub is_regression: bool,
543    pub total_time_ratio: f64,
544    pub slow_ops: Vec<(String, f64, f64)>, // (op_type, ratio, absolute_diff)
545}
546
547impl PerformanceComparison {
548    pub fn new() -> Self {
549        PerformanceComparison {
550            is_regression: false,
551            total_time_ratio: 1.0,
552            slow_ops: Vec::new(),
553        }
554    }
555
556    pub fn summary(&self) -> String {
557        let mut report = String::new();
558
559        if self.is_regression {
560            report.push_str("⚠️  PERFORMANCE REGRESSION DETECTED\n\n");
561        } else {
562            report.push_str("✓ Performance within acceptable range\n\n");
563        }
564
565        report.push_str(&format!(
566            "Total Time Ratio: {:.2}x\n",
567            self.total_time_ratio
568        ));
569
570        if !self.slow_ops.is_empty() {
571            report.push_str("\nSlower Operations:\n");
572            for (op_type, ratio, diff) in &self.slow_ops {
573                report.push_str(&format!(
574                    "  {}: {:.2}x slower (+{:.3}ms)\n",
575                    op_type,
576                    ratio,
577                    diff * 1000.0
578                ));
579            }
580        }
581
582        report
583    }
584}
585
586impl Default for PerformanceComparison {
587    fn default() -> Self {
588        Self::new()
589    }
590}
591
592/// Bottleneck analyzer for identifying performance issues
593pub struct BottleneckAnalyzer;
594
595impl BottleneckAnalyzer {
596    /// Analyze profile data and identify bottlenecks
597    pub fn analyze(data: &ProfileData) -> BottleneckReport {
598        let mut report = BottleneckReport::new();
599
600        // Find operations taking > 10% of total time
601        let total_time = data.total_execution_time.as_secs_f64();
602        if total_time > 0.0 {
603            for (op_type, profile) in &data.op_profiles {
604                let op_time = profile.total_time.as_secs_f64();
605                let percentage = (op_time / total_time) * 100.0;
606
607                if percentage > 10.0 {
608                    report.bottlenecks.push(Bottleneck {
609                        op_type: op_type.clone(),
610                        percentage,
611                        total_time: profile.total_time,
612                        call_count: profile.count,
613                        avg_time: profile.avg_time,
614                    });
615                }
616            }
617        }
618
619        // Sort by percentage (highest first)
620        report
621            .bottlenecks
622            .sort_by(|a, b| b.percentage.partial_cmp(&a.percentage).unwrap());
623
624        // Calculate memory pressure
625        if data.memory.peak_bytes > 1_000_000_000 {
626            // > 1GB
627            report.memory_pressure_high = true;
628        }
629
630        report
631    }
632}
633
634/// Identified performance bottleneck
635#[derive(Debug, Clone)]
636pub struct Bottleneck {
637    pub op_type: String,
638    pub percentage: f64,
639    pub total_time: Duration,
640    pub call_count: usize,
641    pub avg_time: Duration,
642}
643
644/// Bottleneck analysis report
645#[derive(Debug, Clone)]
646pub struct BottleneckReport {
647    pub bottlenecks: Vec<Bottleneck>,
648    pub memory_pressure_high: bool,
649}
650
651impl BottleneckReport {
652    pub fn new() -> Self {
653        BottleneckReport {
654            bottlenecks: Vec::new(),
655            memory_pressure_high: false,
656        }
657    }
658
659    pub fn has_bottlenecks(&self) -> bool {
660        !self.bottlenecks.is_empty() || self.memory_pressure_high
661    }
662
663    pub fn summary(&self) -> String {
664        let mut report = String::new();
665
666        if self.bottlenecks.is_empty() && !self.memory_pressure_high {
667            report.push_str("No significant bottlenecks detected\n");
668            return report;
669        }
670
671        report.push_str("Performance Bottlenecks:\n\n");
672
673        for bottleneck in &self.bottlenecks {
674            report.push_str(&format!(
675                "• {} - {:.1}% of total time\n",
676                bottleneck.op_type, bottleneck.percentage
677            ));
678            report.push_str(&format!(
679                "  {} calls, {:.3}ms avg, {:.3}ms total\n",
680                bottleneck.call_count,
681                bottleneck.avg_time.as_secs_f64() * 1000.0,
682                bottleneck.total_time.as_secs_f64() * 1000.0
683            ));
684        }
685
686        if self.memory_pressure_high {
687            report.push_str("\n⚠️  High memory pressure detected (>1GB peak)\n");
688        }
689
690        report
691    }
692}
693
694impl Default for BottleneckReport {
695    fn default() -> Self {
696        Self::new()
697    }
698}
699
700#[cfg(test)]
701mod tests {
702    use super::*;
703    use std::thread;
704
705    #[test]
706    fn test_op_profile() {
707        let mut profile = OpProfile::new("einsum");
708        assert_eq!(profile.count, 0);
709
710        profile.record(Duration::from_millis(10));
711        assert_eq!(profile.count, 1);
712        assert!(profile.total_time >= Duration::from_millis(10));
713
714        profile.record(Duration::from_millis(20));
715        assert_eq!(profile.count, 2);
716    }
717
718    #[test]
719    fn test_memory_profile() {
720        let mut mem = MemoryProfile::new();
721        assert_eq!(mem.current_bytes, 0);
722        assert_eq!(mem.peak_bytes, 0);
723
724        mem.record_allocation(1000);
725        assert_eq!(mem.current_bytes, 1000);
726        assert_eq!(mem.peak_bytes, 1000);
727
728        mem.record_allocation(500);
729        assert_eq!(mem.current_bytes, 1500);
730        assert_eq!(mem.peak_bytes, 1500);
731
732        mem.record_deallocation(500);
733        assert_eq!(mem.current_bytes, 1000);
734        assert_eq!(mem.peak_bytes, 1500); // Peak stays at max
735    }
736
737    #[test]
738    fn test_profile_data() {
739        let mut data = ProfileData::new();
740
741        data.record_op("einsum", Duration::from_millis(100));
742        data.record_op("relu", Duration::from_millis(50));
743        data.record_op("einsum", Duration::from_millis(80));
744
745        let einsum_profile = data.get_op_profile("einsum").unwrap();
746        assert_eq!(einsum_profile.count, 2);
747
748        let slowest = data.slowest_ops(2);
749        assert_eq!(slowest.len(), 2);
750        assert_eq!(slowest[0].0, "einsum"); // Should be slowest
751    }
752
753    #[test]
754    fn test_profiler() {
755        let mut profiler = Profiler::new();
756
757        profiler.start();
758
759        // Simulate some work
760        profiler.time_op("operation1", || {
761            thread::sleep(Duration::from_millis(1));
762        });
763
764        profiler.time_op("operation2", || {
765            thread::sleep(Duration::from_millis(2));
766        });
767
768        profiler.stop();
769
770        let data = profiler.data();
771        assert_eq!(data.op_profiles.len(), 2);
772        assert!(data.total_execution_time >= Duration::from_millis(3));
773    }
774
775    #[test]
776    fn test_profile_summary() {
777        let mut data = ProfileData::new();
778        data.record_op("einsum", Duration::from_millis(100));
779        data.record_op("relu", Duration::from_millis(50));
780        data.total_execution_time = Duration::from_millis(150);
781        data.memory.record_allocation(1024);
782
783        let summary = data.summary();
784        assert!(summary.contains("150"));
785        assert!(summary.contains("1024"));
786        assert!(summary.contains("einsum"));
787    }
788
789    #[test]
790    fn test_profiler_hook_creation() {
791        let hook = ProfilerHook::new();
792        assert_eq!(hook.profiler().data().op_profiles.len(), 0);
793    }
794
795    #[test]
796    fn test_profiler_hook_with_execution() {
797        use crate::context::ExecutionState;
798
799        let mut hook = ProfilerHook::new();
800        let state = ExecutionState::new(5);
801
802        // Simulate execution flow
803        hook.on_phase_change(ExecutionPhase::Executing, &state);
804        hook.on_node_start(0, &state);
805        thread::sleep(Duration::from_millis(1));
806        hook.on_node_complete(0, Duration::from_millis(1), &state);
807        hook.on_phase_change(ExecutionPhase::Completed, &state);
808
809        let data = hook.profiler().data();
810        assert!(!data.op_profiles.is_empty());
811        assert!(data.total_execution_time > Duration::ZERO);
812    }
813
814    #[test]
815    fn test_timeline_profiler() {
816        let mut profiler = TimelineProfiler::new();
817        profiler.start();
818
819        profiler.record_node_start(0, "einsum");
820        thread::sleep(Duration::from_millis(1));
821        profiler.record_node_end(0);
822
823        profiler.record_node_start(1, "relu");
824        thread::sleep(Duration::from_millis(1));
825        profiler.record_node_end(1);
826
827        let traces = profiler.traces();
828        assert_eq!(traces.len(), 2);
829        assert_eq!(traces[0].node_idx, 0);
830        assert_eq!(traces[1].node_idx, 1);
831    }
832
833    #[test]
834    fn test_timeline_summary() {
835        let mut profiler = TimelineProfiler::new();
836        profiler.start();
837
838        profiler.record_node_start(0, "einsum");
839        thread::sleep(Duration::from_millis(1));
840        profiler.record_node_end(0);
841
842        let summary = profiler.summary();
843        assert!(summary.contains("Node 0"));
844        assert!(summary.contains("einsum"));
845    }
846
847    #[test]
848    fn test_trace_entry() {
849        let entry = TraceEntry::new(
850            0,
851            Duration::from_millis(0),
852            Duration::from_millis(100),
853            "einsum",
854        );
855
856        assert_eq!(entry.node_idx, 0);
857        assert_eq!(entry.duration, Duration::from_millis(100));
858        assert_eq!(entry.op_type, "einsum");
859    }
860
861    #[test]
862    fn test_profile_statistics_empty() {
863        let stats = ProfileStatistics::from_durations(&mut []);
864        assert_eq!(stats.mean, 0.0);
865        assert_eq!(stats.median, 0.0);
866    }
867
868    #[test]
869    fn test_profile_statistics() {
870        let mut durations = vec![
871            Duration::from_millis(10),
872            Duration::from_millis(20),
873            Duration::from_millis(30),
874            Duration::from_millis(40),
875            Duration::from_millis(50),
876        ];
877
878        let stats = ProfileStatistics::from_durations(&mut durations);
879        assert!(stats.mean > 0.0);
880        assert!(stats.p50 > 0.0);
881        assert!(stats.p90 > 0.0);
882        assert!(stats.p95 > 0.0);
883        assert!(stats.p99 > 0.0);
884        assert!(stats.p99 >= stats.p95);
885        assert!(stats.p95 >= stats.p90);
886        assert!(stats.p90 >= stats.p50);
887    }
888
889    #[test]
890    fn test_performance_baseline_creation() {
891        let mut data = ProfileData::new();
892        data.record_op("einsum", Duration::from_millis(100));
893        data.record_op("relu", Duration::from_millis(50));
894        data.total_execution_time = Duration::from_millis(150);
895
896        let baseline = PerformanceBaseline::from_profile_data(&data);
897        assert_eq!(baseline.op_baselines.len(), 2);
898        assert!(baseline.op_baselines.contains_key("einsum"));
899        assert!(baseline.op_baselines.contains_key("relu"));
900    }
901
902    #[test]
903    fn test_performance_comparison_no_regression() {
904        let mut baseline_data = ProfileData::new();
905        baseline_data.record_op("einsum", Duration::from_millis(100));
906        baseline_data.total_execution_time = Duration::from_millis(100);
907
908        let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
909
910        let mut current_data = ProfileData::new();
911        current_data.record_op("einsum", Duration::from_millis(105)); // 5% slower
912        current_data.total_execution_time = Duration::from_millis(105);
913
914        let comparison = baseline.compare(&current_data);
915        assert!(!comparison.is_regression); // Within 10% threshold
916    }
917
918    #[test]
919    fn test_performance_comparison_with_regression() {
920        let mut baseline_data = ProfileData::new();
921        baseline_data.record_op("einsum", Duration::from_millis(100));
922        baseline_data.total_execution_time = Duration::from_millis(100);
923
924        let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
925
926        let mut current_data = ProfileData::new();
927        current_data.record_op("einsum", Duration::from_millis(120)); // 20% slower
928        current_data.total_execution_time = Duration::from_millis(120);
929
930        let comparison = baseline.compare(&current_data);
931        assert!(comparison.is_regression);
932        assert!(comparison.total_time_ratio > 1.1);
933    }
934
935    #[test]
936    fn test_performance_comparison_summary() {
937        let mut baseline_data = ProfileData::new();
938        baseline_data.record_op("einsum", Duration::from_millis(100));
939        baseline_data.total_execution_time = Duration::from_millis(100);
940
941        let baseline = PerformanceBaseline::from_profile_data(&baseline_data);
942
943        let mut current_data = ProfileData::new();
944        current_data.record_op("einsum", Duration::from_millis(150));
945        current_data.total_execution_time = Duration::from_millis(150);
946
947        let comparison = baseline.compare(&current_data);
948        let summary = comparison.summary();
949        assert!(summary.contains("REGRESSION") || summary.contains("1."));
950    }
951
952    #[test]
953    fn test_bottleneck_analyzer_no_bottlenecks() {
954        let mut data = ProfileData::new();
955        data.record_op("op1", Duration::from_millis(5));
956        data.record_op("op2", Duration::from_millis(5));
957        data.total_execution_time = Duration::from_millis(100);
958
959        let report = BottleneckAnalyzer::analyze(&data);
960        assert!(report.bottlenecks.is_empty());
961        assert!(!report.has_bottlenecks());
962    }
963
964    #[test]
965    fn test_bottleneck_analyzer_with_bottleneck() {
966        let mut data = ProfileData::new();
967        data.record_op("slow_op", Duration::from_millis(50));
968        data.record_op("fast_op", Duration::from_millis(5));
969        data.total_execution_time = Duration::from_millis(100);
970
971        let report = BottleneckAnalyzer::analyze(&data);
972        assert!(!report.bottlenecks.is_empty());
973        assert_eq!(report.bottlenecks[0].op_type, "slow_op");
974        assert!(report.bottlenecks[0].percentage > 10.0);
975    }
976
977    #[test]
978    fn test_bottleneck_analyzer_memory_pressure() {
979        let mut data = ProfileData::new();
980        data.total_execution_time = Duration::from_millis(100);
981        data.memory.record_allocation(2_000_000_000); // 2GB
982
983        let report = BottleneckAnalyzer::analyze(&data);
984        assert!(report.memory_pressure_high);
985        assert!(report.has_bottlenecks());
986    }
987
988    #[test]
989    fn test_bottleneck_report_summary() {
990        let mut data = ProfileData::new();
991        data.record_op("slow_op", Duration::from_millis(60));
992        data.total_execution_time = Duration::from_millis(100);
993
994        let report = BottleneckAnalyzer::analyze(&data);
995        let summary = report.summary();
996        assert!(summary.contains("slow_op"));
997        assert!(summary.contains("%"));
998    }
999
1000    #[test]
1001    fn test_bottleneck_report_summary_no_issues() {
1002        let report = BottleneckReport::new();
1003        let summary = report.summary();
1004        assert!(summary.contains("No significant bottlenecks"));
1005    }
1006}