Skip to main content

tensorlogic_infer/
profiling_optimizer.rs

1//! Profiling-guided optimization for adaptive performance tuning.
2//!
3//! This module provides runtime profiling and adaptive optimization:
4//! - **Profile collection**: Gather execution statistics during runtime
5//! - **Hotspot detection**: Identify performance bottlenecks
6//! - **Adaptive optimization**: Adjust strategy based on observed behavior
7//! - **A/B testing**: Compare optimization strategies
8//! - **Auto-tuning**: Automatically select best configurations
9//!
10//! ## Example
11//!
12//! ```rust,ignore
13//! use tensorlogic_infer::{ProfilingOptimizer, OptimizationGoal, TuningConfig};
14//!
15//! // Create profiling-guided optimizer
16//! let mut optimizer = ProfilingOptimizer::new()
17//!     .with_goal(OptimizationGoal::MinimizeLatency)
18//!     .with_tuning_enabled(true);
19//!
20//! // Execute with profiling
21//! for batch in dataset {
22//!     let result = optimizer.execute_and_profile(&graph, &batch)?;
23//!
24//!     // Optimizer automatically adapts based on observed performance
25//!     if optimizer.should_reoptimize() {
26//!         optimizer.apply_optimizations(&graph)?;
27//!     }
28//! }
29//!
30//! // Get optimization report
31//! let report = optimizer.generate_report();
32//! println!("Speedup: {:.2}x", report.speedup);
33//! ```
34
35use serde::{Deserialize, Serialize};
36use thiserror::Error;
37
38/// Profiling-guided optimization errors.
39#[derive(Error, Debug, Clone, PartialEq)]
40pub enum ProfilingOptimizerError {
41    #[error("Insufficient profiling data: {0}")]
42    InsufficientData(String),
43
44    #[error("Optimization failed: {0}")]
45    OptimizationFailed(String),
46
47    #[error("Invalid configuration: {0}")]
48    InvalidConfig(String),
49
50    #[error("Tuning failed: {0}")]
51    TuningFailed(String),
52}
53
54/// Optimization goal.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
56pub enum OptimizationGoal {
57    /// Minimize latency (single request)
58    MinimizeLatency,
59
60    /// Maximize throughput (requests/second)
61    MaximizeThroughput,
62
63    /// Minimize memory usage
64    MinimizeMemory,
65
66    /// Balance latency and throughput
67    Balanced,
68
69    /// Minimize energy consumption
70    MinimizeEnergy,
71}
72
73/// Execution profile for a single run.
74#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ExecutionProfile {
76    /// Execution time (microseconds)
77    pub execution_time_us: u64,
78
79    /// Memory used (bytes)
80    pub memory_bytes: usize,
81
82    /// Operations executed
83    pub operations_count: usize,
84
85    /// Cache hit rate
86    pub cache_hit_rate: f64,
87
88    /// Parallelism utilization
89    pub parallelism_utilization: f64,
90
91    /// Timestamp
92    pub timestamp: std::time::SystemTime,
93}
94
95impl ExecutionProfile {
96    /// Create a new execution profile.
97    pub fn new(execution_time_us: u64, memory_bytes: usize) -> Self {
98        Self {
99            execution_time_us,
100            memory_bytes,
101            operations_count: 0,
102            cache_hit_rate: 0.0,
103            parallelism_utilization: 0.0,
104            timestamp: std::time::SystemTime::now(),
105        }
106    }
107
108    /// Get execution time in milliseconds.
109    pub fn execution_time_ms(&self) -> f64 {
110        self.execution_time_us as f64 / 1000.0
111    }
112
113    /// Get memory in megabytes.
114    pub fn memory_mb(&self) -> f64 {
115        self.memory_bytes as f64 / (1024.0 * 1024.0)
116    }
117
118    /// Get throughput (operations per second).
119    pub fn throughput(&self) -> f64 {
120        if self.execution_time_us > 0 {
121            (self.operations_count as f64) / (self.execution_time_us as f64 / 1_000_000.0)
122        } else {
123            0.0
124        }
125    }
126}
127
128/// Hotspot in the computation graph.
129#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
130pub struct Hotspot {
131    /// Node or operation identifier
132    pub identifier: String,
133
134    /// Percentage of total execution time
135    pub time_percentage: f64,
136
137    /// Number of executions
138    pub execution_count: usize,
139
140    /// Average time per execution (microseconds)
141    pub avg_time_us: f64,
142
143    /// Optimization suggestions
144    pub suggestions: Vec<String>,
145}
146
147impl Hotspot {
148    /// Check if this is a critical hotspot (>10% of time).
149    pub fn is_critical(&self) -> bool {
150        self.time_percentage > 10.0
151    }
152
153    /// Get total time spent (microseconds).
154    pub fn total_time_us(&self) -> f64 {
155        self.avg_time_us * self.execution_count as f64
156    }
157}
158
159/// Optimization strategy configuration.
160#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
161pub struct OptimizationStrategy {
162    /// Enable operator fusion
163    pub enable_fusion: bool,
164
165    /// Enable constant folding
166    pub enable_constant_folding: bool,
167
168    /// Enable memory pooling
169    pub enable_memory_pooling: bool,
170
171    /// Enable parallel execution
172    pub enable_parallelism: bool,
173
174    /// Parallelism degree (0 = auto)
175    pub parallelism_degree: usize,
176
177    /// Enable SIMD optimizations
178    pub enable_simd: bool,
179
180    /// Enable sparse optimizations
181    pub enable_sparse: bool,
182
183    /// Batch size (0 = auto)
184    pub batch_size: usize,
185}
186
187impl Default for OptimizationStrategy {
188    fn default() -> Self {
189        Self {
190            enable_fusion: true,
191            enable_constant_folding: true,
192            enable_memory_pooling: true,
193            enable_parallelism: true,
194            parallelism_degree: 0,
195            enable_simd: true,
196            enable_sparse: false,
197            batch_size: 0,
198        }
199    }
200}
201
202impl OptimizationStrategy {
203    /// Create a conservative strategy (minimal optimizations).
204    pub fn conservative() -> Self {
205        Self {
206            enable_fusion: false,
207            enable_constant_folding: true,
208            enable_memory_pooling: false,
209            enable_parallelism: false,
210            parallelism_degree: 1,
211            enable_simd: false,
212            enable_sparse: false,
213            batch_size: 1,
214        }
215    }
216
217    /// Create an aggressive strategy (maximum optimizations).
218    pub fn aggressive() -> Self {
219        Self {
220            enable_fusion: true,
221            enable_constant_folding: true,
222            enable_memory_pooling: true,
223            enable_parallelism: true,
224            parallelism_degree: 0, // Auto
225            enable_simd: true,
226            enable_sparse: true,
227            batch_size: 0, // Auto
228        }
229    }
230
231    /// Score this strategy based on profile.
232    pub fn score(&self, profile: &ExecutionProfile) -> f64 {
233        let mut score = 0.0;
234
235        // Faster execution is better
236        score += 1000.0 / profile.execution_time_ms().max(0.1);
237
238        // Less memory is better
239        score += 100.0 / profile.memory_mb().max(0.1);
240
241        // Higher cache hit rate is better
242        score += profile.cache_hit_rate * 50.0;
243
244        // Higher parallelism is better
245        score += profile.parallelism_utilization * 30.0;
246
247        score
248    }
249}
250
251/// Tuning configuration.
252#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
253pub struct TuningConfig {
254    /// Number of warmup runs
255    pub warmup_runs: usize,
256
257    /// Number of measurement runs per configuration
258    pub measurement_runs: usize,
259
260    /// Enable A/B testing
261    pub enable_ab_testing: bool,
262
263    /// Statistical significance level (0.0-1.0)
264    pub significance_level: f64,
265
266    /// Maximum tuning time (seconds)
267    pub max_tuning_time_secs: u64,
268}
269
270impl Default for TuningConfig {
271    fn default() -> Self {
272        Self {
273            warmup_runs: 3,
274            measurement_runs: 5,
275            enable_ab_testing: true,
276            significance_level: 0.05,
277            max_tuning_time_secs: 300,
278        }
279    }
280}
281
282/// Profiling-guided optimizer.
283pub struct ProfilingOptimizer {
284    /// Optimization goal
285    goal: OptimizationGoal,
286
287    /// Current optimization strategy
288    current_strategy: OptimizationStrategy,
289
290    /// Collected profiles
291    profiles: Vec<ExecutionProfile>,
292
293    /// Detected hotspots
294    hotspots: Vec<Hotspot>,
295
296    /// Tuning configuration
297    tuning_config: TuningConfig,
298
299    /// Enable auto-tuning
300    auto_tuning_enabled: bool,
301
302    /// Number of executions since last optimization
303    executions_since_optimization: usize,
304
305    /// Reoptimization threshold
306    reoptimization_threshold: usize,
307
308    /// Best observed strategy
309    best_strategy: Option<OptimizationStrategy>,
310
311    /// Best observed score
312    best_score: f64,
313}
314
315impl ProfilingOptimizer {
316    /// Create a new profiling optimizer.
317    pub fn new() -> Self {
318        Self {
319            goal: OptimizationGoal::Balanced,
320            current_strategy: OptimizationStrategy::default(),
321            profiles: Vec::new(),
322            hotspots: Vec::new(),
323            tuning_config: TuningConfig::default(),
324            auto_tuning_enabled: false,
325            executions_since_optimization: 0,
326            reoptimization_threshold: 100,
327            best_strategy: None,
328            best_score: 0.0,
329        }
330    }
331
332    /// Set the optimization goal.
333    pub fn with_goal(mut self, goal: OptimizationGoal) -> Self {
334        self.goal = goal;
335        self
336    }
337
338    /// Enable or disable auto-tuning.
339    pub fn with_tuning_enabled(mut self, enabled: bool) -> Self {
340        self.auto_tuning_enabled = enabled;
341        self
342    }
343
344    /// Set the tuning configuration.
345    pub fn with_tuning_config(mut self, config: TuningConfig) -> Self {
346        self.tuning_config = config;
347        self
348    }
349
350    /// Set the current optimization strategy.
351    pub fn with_strategy(mut self, strategy: OptimizationStrategy) -> Self {
352        self.current_strategy = strategy;
353        self
354    }
355
356    /// Record an execution profile.
357    pub fn record_profile(&mut self, profile: ExecutionProfile) {
358        self.profiles.push(profile.clone());
359        self.executions_since_optimization += 1;
360
361        // Update best strategy if this is better
362        let score = self.current_strategy.score(&profile);
363        if score > self.best_score {
364            self.best_score = score;
365            self.best_strategy = Some(self.current_strategy.clone());
366        }
367
368        // Trim old profiles
369        if self.profiles.len() > 1000 {
370            self.profiles.drain(0..500);
371        }
372    }
373
374    /// Check if reoptimization should be triggered.
375    pub fn should_reoptimize(&self) -> bool {
376        self.executions_since_optimization >= self.reoptimization_threshold
377    }
378
379    /// Detect hotspots from collected profiles.
380    pub fn detect_hotspots(&mut self) -> Vec<Hotspot> {
381        if self.profiles.is_empty() {
382            return Vec::new();
383        }
384
385        // Simplified hotspot detection
386        let mut hotspots = Vec::new();
387
388        // Example: Create a hotspot for overall execution
389        let total_time: u64 = self.profiles.iter().map(|p| p.execution_time_us).sum();
390        let avg_time = total_time as f64 / self.profiles.len() as f64;
391
392        let hotspot = Hotspot {
393            identifier: "overall_execution".to_string(),
394            time_percentage: 100.0,
395            execution_count: self.profiles.len(),
396            avg_time_us: avg_time,
397            suggestions: self.generate_suggestions(),
398        };
399
400        hotspots.push(hotspot);
401        self.hotspots = hotspots.clone();
402
403        hotspots
404    }
405
406    /// Generate optimization suggestions based on profiles.
407    fn generate_suggestions(&self) -> Vec<String> {
408        let mut suggestions = Vec::new();
409
410        if self.profiles.is_empty() {
411            return suggestions;
412        }
413
414        let avg_profile = self.average_profile();
415
416        // Memory-based suggestions
417        if avg_profile.memory_mb() > 1000.0 {
418            suggestions.push("Consider enabling memory pooling to reduce allocations".to_string());
419        }
420
421        // Parallelism suggestions
422        if avg_profile.parallelism_utilization < 0.5 {
423            suggestions
424                .push("Low parallelism utilization - consider increasing batch size".to_string());
425        }
426
427        // Cache suggestions
428        if avg_profile.cache_hit_rate < 0.7 {
429            suggestions.push("Low cache hit rate - consider data layout optimization".to_string());
430        }
431
432        suggestions
433    }
434
435    /// Compute average profile.
436    fn average_profile(&self) -> ExecutionProfile {
437        if self.profiles.is_empty() {
438            return ExecutionProfile::new(0, 0);
439        }
440
441        let n = self.profiles.len() as f64;
442        let avg_time = self
443            .profiles
444            .iter()
445            .map(|p| p.execution_time_us)
446            .sum::<u64>() as f64
447            / n;
448        let avg_memory = self.profiles.iter().map(|p| p.memory_bytes).sum::<usize>() as f64 / n;
449
450        ExecutionProfile {
451            execution_time_us: avg_time as u64,
452            memory_bytes: avg_memory as usize,
453            operations_count: (self
454                .profiles
455                .iter()
456                .map(|p| p.operations_count)
457                .sum::<usize>() as f64
458                / n) as usize,
459            cache_hit_rate: self.profiles.iter().map(|p| p.cache_hit_rate).sum::<f64>() / n,
460            parallelism_utilization: self
461                .profiles
462                .iter()
463                .map(|p| p.parallelism_utilization)
464                .sum::<f64>()
465                / n,
466            timestamp: std::time::SystemTime::now(),
467        }
468    }
469
470    /// Run auto-tuning to find best strategy.
471    pub fn auto_tune(&mut self) -> Result<OptimizationStrategy, ProfilingOptimizerError> {
472        let strategies = vec![
473            OptimizationStrategy::conservative(),
474            OptimizationStrategy::default(),
475            OptimizationStrategy::aggressive(),
476        ];
477
478        let mut best_strategy = strategies[0].clone();
479        let mut best_score = 0.0;
480
481        // Simulate evaluation of each strategy
482        for strategy in strategies {
483            // In real implementation, would actually execute with this strategy
484            let profile = self.average_profile();
485            let score = strategy.score(&profile);
486
487            if score > best_score {
488                best_score = score;
489                best_strategy = strategy.clone();
490            }
491        }
492
493        self.current_strategy = best_strategy.clone();
494        self.best_strategy = Some(best_strategy.clone());
495        self.best_score = best_score;
496
497        Ok(best_strategy)
498    }
499
500    /// Generate optimization report.
501    pub fn generate_report(&self) -> OptimizationReport {
502        let baseline_profile = self.profiles.first();
503        let current_profile = self.profiles.last();
504
505        let speedup = if let (Some(baseline), Some(current)) = (baseline_profile, current_profile) {
506            baseline.execution_time_us as f64 / current.execution_time_us.max(1) as f64
507        } else {
508            1.0
509        };
510
511        OptimizationReport {
512            goal: self.goal,
513            total_profiles: self.profiles.len(),
514            hotspots_detected: self.hotspots.len(),
515            current_strategy: self.current_strategy.clone(),
516            best_strategy: self.best_strategy.clone(),
517            speedup,
518            memory_reduction: 0.0, // Would calculate from profiles
519            tuning_runs: self.tuning_config.measurement_runs,
520        }
521    }
522
523    /// Reset optimizer state.
524    pub fn reset(&mut self) {
525        self.profiles.clear();
526        self.hotspots.clear();
527        self.executions_since_optimization = 0;
528        self.best_strategy = None;
529        self.best_score = 0.0;
530    }
531}
532
533impl Default for ProfilingOptimizer {
534    fn default() -> Self {
535        Self::new()
536    }
537}
538
539/// Optimization report.
540#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
541pub struct OptimizationReport {
542    /// Optimization goal
543    pub goal: OptimizationGoal,
544
545    /// Total profiles collected
546    pub total_profiles: usize,
547
548    /// Hotspots detected
549    pub hotspots_detected: usize,
550
551    /// Current strategy
552    pub current_strategy: OptimizationStrategy,
553
554    /// Best strategy found
555    pub best_strategy: Option<OptimizationStrategy>,
556
557    /// Speedup achieved
558    pub speedup: f64,
559
560    /// Memory reduction (percentage)
561    pub memory_reduction: f64,
562
563    /// Tuning runs performed
564    pub tuning_runs: usize,
565}
566
567impl std::fmt::Display for OptimizationReport {
568    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
569        writeln!(f, "Profiling-Guided Optimization Report")?;
570        writeln!(f, "=====================================")?;
571        writeln!(f, "Goal:              {:?}", self.goal)?;
572        writeln!(f, "Profiles:          {}", self.total_profiles)?;
573        writeln!(f, "Hotspots:          {}", self.hotspots_detected)?;
574        writeln!(f, "Speedup:           {:.2}x", self.speedup)?;
575        writeln!(f, "Memory reduction:  {:.1}%", self.memory_reduction)?;
576        writeln!(f, "Tuning runs:       {}", self.tuning_runs)?;
577
578        if let Some(best) = &self.best_strategy {
579            writeln!(f, "\nBest Strategy:")?;
580            writeln!(f, "  Fusion:          {}", best.enable_fusion)?;
581            writeln!(f, "  Parallelism:     {}", best.enable_parallelism)?;
582            writeln!(f, "  SIMD:            {}", best.enable_simd)?;
583        }
584
585        Ok(())
586    }
587}
588
589#[cfg(test)]
590mod tests {
591    use super::*;
592
593    #[test]
594    fn test_execution_profile() {
595        let profile = ExecutionProfile::new(1000, 1024 * 1024);
596        assert_eq!(profile.execution_time_us, 1000);
597        assert_eq!(profile.memory_bytes, 1024 * 1024);
598        assert_eq!(profile.execution_time_ms(), 1.0);
599        assert!((profile.memory_mb() - 1.0).abs() < 0.01);
600    }
601
602    #[test]
603    fn test_execution_profile_throughput() {
604        let mut profile = ExecutionProfile::new(1_000_000, 0);
605        profile.operations_count = 1000;
606        assert_eq!(profile.throughput(), 1000.0);
607    }
608
609    #[test]
610    fn test_hotspot_is_critical() {
611        let hotspot = Hotspot {
612            identifier: "op1".to_string(),
613            time_percentage: 15.0,
614            execution_count: 100,
615            avg_time_us: 100.0,
616            suggestions: Vec::new(),
617        };
618
619        assert!(hotspot.is_critical());
620    }
621
622    #[test]
623    fn test_hotspot_total_time() {
624        let hotspot = Hotspot {
625            identifier: "op1".to_string(),
626            time_percentage: 10.0,
627            execution_count: 100,
628            avg_time_us: 50.0,
629            suggestions: Vec::new(),
630        };
631
632        assert_eq!(hotspot.total_time_us(), 5000.0);
633    }
634
635    #[test]
636    fn test_optimization_strategy_default() {
637        let strategy = OptimizationStrategy::default();
638        assert!(strategy.enable_fusion);
639        assert!(strategy.enable_parallelism);
640    }
641
642    #[test]
643    fn test_optimization_strategy_conservative() {
644        let strategy = OptimizationStrategy::conservative();
645        assert!(!strategy.enable_fusion);
646        assert!(!strategy.enable_parallelism);
647    }
648
649    #[test]
650    fn test_optimization_strategy_aggressive() {
651        let strategy = OptimizationStrategy::aggressive();
652        assert!(strategy.enable_fusion);
653        assert!(strategy.enable_parallelism);
654        assert!(strategy.enable_simd);
655    }
656
657    #[test]
658    fn test_profiling_optimizer_creation() {
659        let optimizer = ProfilingOptimizer::new();
660        assert_eq!(optimizer.goal, OptimizationGoal::Balanced);
661        assert_eq!(optimizer.profiles.len(), 0);
662    }
663
664    #[test]
665    fn test_profiling_optimizer_with_goal() {
666        let optimizer = ProfilingOptimizer::new().with_goal(OptimizationGoal::MinimizeLatency);
667        assert_eq!(optimizer.goal, OptimizationGoal::MinimizeLatency);
668    }
669
670    #[test]
671    fn test_profiling_optimizer_record_profile() {
672        let mut optimizer = ProfilingOptimizer::new();
673        let profile = ExecutionProfile::new(1000, 1024);
674
675        optimizer.record_profile(profile);
676        assert_eq!(optimizer.profiles.len(), 1);
677        assert_eq!(optimizer.executions_since_optimization, 1);
678    }
679
680    #[test]
681    fn test_profiling_optimizer_should_reoptimize() {
682        let mut optimizer = ProfilingOptimizer::new();
683        optimizer.reoptimization_threshold = 5;
684
685        assert!(!optimizer.should_reoptimize());
686
687        for _ in 0..5 {
688            optimizer.record_profile(ExecutionProfile::new(1000, 1024));
689        }
690
691        assert!(optimizer.should_reoptimize());
692    }
693
694    #[test]
695    fn test_profiling_optimizer_detect_hotspots() {
696        let mut optimizer = ProfilingOptimizer::new();
697        optimizer.record_profile(ExecutionProfile::new(1000, 1024));
698
699        let hotspots = optimizer.detect_hotspots();
700        assert!(!hotspots.is_empty());
701    }
702
703    #[test]
704    fn test_profiling_optimizer_auto_tune() {
705        let mut optimizer = ProfilingOptimizer::new();
706        optimizer.record_profile(ExecutionProfile::new(1000, 1024));
707
708        let result = optimizer.auto_tune();
709        assert!(result.is_ok());
710        assert!(optimizer.best_strategy.is_some());
711    }
712
713    #[test]
714    fn test_profiling_optimizer_generate_report() {
715        let mut optimizer = ProfilingOptimizer::new();
716        optimizer.record_profile(ExecutionProfile::new(2000, 1024));
717        optimizer.record_profile(ExecutionProfile::new(1000, 512));
718
719        let report = optimizer.generate_report();
720        assert_eq!(report.total_profiles, 2);
721        assert!(report.speedup > 1.0);
722    }
723
724    #[test]
725    fn test_profiling_optimizer_reset() {
726        let mut optimizer = ProfilingOptimizer::new();
727        optimizer.record_profile(ExecutionProfile::new(1000, 1024));
728
729        optimizer.reset();
730        assert_eq!(optimizer.profiles.len(), 0);
731        assert_eq!(optimizer.executions_since_optimization, 0);
732    }
733
734    #[test]
735    fn test_tuning_config_default() {
736        let config = TuningConfig::default();
737        assert_eq!(config.warmup_runs, 3);
738        assert_eq!(config.measurement_runs, 5);
739    }
740
741    #[test]
742    fn test_optimization_report_display() {
743        let report = OptimizationReport {
744            goal: OptimizationGoal::MinimizeLatency,
745            total_profiles: 100,
746            hotspots_detected: 5,
747            current_strategy: OptimizationStrategy::default(),
748            best_strategy: Some(OptimizationStrategy::aggressive()),
749            speedup: 2.5,
750            memory_reduction: 30.0,
751            tuning_runs: 10,
752        };
753
754        let display = format!("{}", report);
755        assert!(display.contains("Speedup:           2.50x"));
756        assert!(display.contains("Memory reduction:  30.0%"));
757    }
758}