1use serde::{Deserialize, Serialize};
36use thiserror::Error;
37
38#[derive(Error, Debug, Clone, PartialEq)]
40pub enum ProfilingOptimizerError {
41 #[error("Insufficient profiling data: {0}")]
42 InsufficientData(String),
43
44 #[error("Optimization failed: {0}")]
45 OptimizationFailed(String),
46
47 #[error("Invalid configuration: {0}")]
48 InvalidConfig(String),
49
50 #[error("Tuning failed: {0}")]
51 TuningFailed(String),
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
56pub enum OptimizationGoal {
57 MinimizeLatency,
59
60 MaximizeThroughput,
62
63 MinimizeMemory,
65
66 Balanced,
68
69 MinimizeEnergy,
71}
72
73#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
75pub struct ExecutionProfile {
76 pub execution_time_us: u64,
78
79 pub memory_bytes: usize,
81
82 pub operations_count: usize,
84
85 pub cache_hit_rate: f64,
87
88 pub parallelism_utilization: f64,
90
91 pub timestamp: std::time::SystemTime,
93}
94
95impl ExecutionProfile {
96 pub fn new(execution_time_us: u64, memory_bytes: usize) -> Self {
98 Self {
99 execution_time_us,
100 memory_bytes,
101 operations_count: 0,
102 cache_hit_rate: 0.0,
103 parallelism_utilization: 0.0,
104 timestamp: std::time::SystemTime::now(),
105 }
106 }
107
108 pub fn execution_time_ms(&self) -> f64 {
110 self.execution_time_us as f64 / 1000.0
111 }
112
113 pub fn memory_mb(&self) -> f64 {
115 self.memory_bytes as f64 / (1024.0 * 1024.0)
116 }
117
118 pub fn throughput(&self) -> f64 {
120 if self.execution_time_us > 0 {
121 (self.operations_count as f64) / (self.execution_time_us as f64 / 1_000_000.0)
122 } else {
123 0.0
124 }
125 }
126}
127
128#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
130pub struct Hotspot {
131 pub identifier: String,
133
134 pub time_percentage: f64,
136
137 pub execution_count: usize,
139
140 pub avg_time_us: f64,
142
143 pub suggestions: Vec<String>,
145}
146
147impl Hotspot {
148 pub fn is_critical(&self) -> bool {
150 self.time_percentage > 10.0
151 }
152
153 pub fn total_time_us(&self) -> f64 {
155 self.avg_time_us * self.execution_count as f64
156 }
157}
158
159#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
161pub struct OptimizationStrategy {
162 pub enable_fusion: bool,
164
165 pub enable_constant_folding: bool,
167
168 pub enable_memory_pooling: bool,
170
171 pub enable_parallelism: bool,
173
174 pub parallelism_degree: usize,
176
177 pub enable_simd: bool,
179
180 pub enable_sparse: bool,
182
183 pub batch_size: usize,
185}
186
187impl Default for OptimizationStrategy {
188 fn default() -> Self {
189 Self {
190 enable_fusion: true,
191 enable_constant_folding: true,
192 enable_memory_pooling: true,
193 enable_parallelism: true,
194 parallelism_degree: 0,
195 enable_simd: true,
196 enable_sparse: false,
197 batch_size: 0,
198 }
199 }
200}
201
202impl OptimizationStrategy {
203 pub fn conservative() -> Self {
205 Self {
206 enable_fusion: false,
207 enable_constant_folding: true,
208 enable_memory_pooling: false,
209 enable_parallelism: false,
210 parallelism_degree: 1,
211 enable_simd: false,
212 enable_sparse: false,
213 batch_size: 1,
214 }
215 }
216
217 pub fn aggressive() -> Self {
219 Self {
220 enable_fusion: true,
221 enable_constant_folding: true,
222 enable_memory_pooling: true,
223 enable_parallelism: true,
224 parallelism_degree: 0, enable_simd: true,
226 enable_sparse: true,
227 batch_size: 0, }
229 }
230
231 pub fn score(&self, profile: &ExecutionProfile) -> f64 {
233 let mut score = 0.0;
234
235 score += 1000.0 / profile.execution_time_ms().max(0.1);
237
238 score += 100.0 / profile.memory_mb().max(0.1);
240
241 score += profile.cache_hit_rate * 50.0;
243
244 score += profile.parallelism_utilization * 30.0;
246
247 score
248 }
249}
250
251#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
253pub struct TuningConfig {
254 pub warmup_runs: usize,
256
257 pub measurement_runs: usize,
259
260 pub enable_ab_testing: bool,
262
263 pub significance_level: f64,
265
266 pub max_tuning_time_secs: u64,
268}
269
270impl Default for TuningConfig {
271 fn default() -> Self {
272 Self {
273 warmup_runs: 3,
274 measurement_runs: 5,
275 enable_ab_testing: true,
276 significance_level: 0.05,
277 max_tuning_time_secs: 300,
278 }
279 }
280}
281
282pub struct ProfilingOptimizer {
284 goal: OptimizationGoal,
286
287 current_strategy: OptimizationStrategy,
289
290 profiles: Vec<ExecutionProfile>,
292
293 hotspots: Vec<Hotspot>,
295
296 tuning_config: TuningConfig,
298
299 auto_tuning_enabled: bool,
301
302 executions_since_optimization: usize,
304
305 reoptimization_threshold: usize,
307
308 best_strategy: Option<OptimizationStrategy>,
310
311 best_score: f64,
313}
314
315impl ProfilingOptimizer {
316 pub fn new() -> Self {
318 Self {
319 goal: OptimizationGoal::Balanced,
320 current_strategy: OptimizationStrategy::default(),
321 profiles: Vec::new(),
322 hotspots: Vec::new(),
323 tuning_config: TuningConfig::default(),
324 auto_tuning_enabled: false,
325 executions_since_optimization: 0,
326 reoptimization_threshold: 100,
327 best_strategy: None,
328 best_score: 0.0,
329 }
330 }
331
332 pub fn with_goal(mut self, goal: OptimizationGoal) -> Self {
334 self.goal = goal;
335 self
336 }
337
338 pub fn with_tuning_enabled(mut self, enabled: bool) -> Self {
340 self.auto_tuning_enabled = enabled;
341 self
342 }
343
344 pub fn with_tuning_config(mut self, config: TuningConfig) -> Self {
346 self.tuning_config = config;
347 self
348 }
349
350 pub fn with_strategy(mut self, strategy: OptimizationStrategy) -> Self {
352 self.current_strategy = strategy;
353 self
354 }
355
356 pub fn record_profile(&mut self, profile: ExecutionProfile) {
358 self.profiles.push(profile.clone());
359 self.executions_since_optimization += 1;
360
361 let score = self.current_strategy.score(&profile);
363 if score > self.best_score {
364 self.best_score = score;
365 self.best_strategy = Some(self.current_strategy.clone());
366 }
367
368 if self.profiles.len() > 1000 {
370 self.profiles.drain(0..500);
371 }
372 }
373
374 pub fn should_reoptimize(&self) -> bool {
376 self.executions_since_optimization >= self.reoptimization_threshold
377 }
378
379 pub fn detect_hotspots(&mut self) -> Vec<Hotspot> {
381 if self.profiles.is_empty() {
382 return Vec::new();
383 }
384
385 let mut hotspots = Vec::new();
387
388 let total_time: u64 = self.profiles.iter().map(|p| p.execution_time_us).sum();
390 let avg_time = total_time as f64 / self.profiles.len() as f64;
391
392 let hotspot = Hotspot {
393 identifier: "overall_execution".to_string(),
394 time_percentage: 100.0,
395 execution_count: self.profiles.len(),
396 avg_time_us: avg_time,
397 suggestions: self.generate_suggestions(),
398 };
399
400 hotspots.push(hotspot);
401 self.hotspots = hotspots.clone();
402
403 hotspots
404 }
405
406 fn generate_suggestions(&self) -> Vec<String> {
408 let mut suggestions = Vec::new();
409
410 if self.profiles.is_empty() {
411 return suggestions;
412 }
413
414 let avg_profile = self.average_profile();
415
416 if avg_profile.memory_mb() > 1000.0 {
418 suggestions.push("Consider enabling memory pooling to reduce allocations".to_string());
419 }
420
421 if avg_profile.parallelism_utilization < 0.5 {
423 suggestions
424 .push("Low parallelism utilization - consider increasing batch size".to_string());
425 }
426
427 if avg_profile.cache_hit_rate < 0.7 {
429 suggestions.push("Low cache hit rate - consider data layout optimization".to_string());
430 }
431
432 suggestions
433 }
434
435 fn average_profile(&self) -> ExecutionProfile {
437 if self.profiles.is_empty() {
438 return ExecutionProfile::new(0, 0);
439 }
440
441 let n = self.profiles.len() as f64;
442 let avg_time = self
443 .profiles
444 .iter()
445 .map(|p| p.execution_time_us)
446 .sum::<u64>() as f64
447 / n;
448 let avg_memory = self.profiles.iter().map(|p| p.memory_bytes).sum::<usize>() as f64 / n;
449
450 ExecutionProfile {
451 execution_time_us: avg_time as u64,
452 memory_bytes: avg_memory as usize,
453 operations_count: (self
454 .profiles
455 .iter()
456 .map(|p| p.operations_count)
457 .sum::<usize>() as f64
458 / n) as usize,
459 cache_hit_rate: self.profiles.iter().map(|p| p.cache_hit_rate).sum::<f64>() / n,
460 parallelism_utilization: self
461 .profiles
462 .iter()
463 .map(|p| p.parallelism_utilization)
464 .sum::<f64>()
465 / n,
466 timestamp: std::time::SystemTime::now(),
467 }
468 }
469
470 pub fn auto_tune(&mut self) -> Result<OptimizationStrategy, ProfilingOptimizerError> {
472 let strategies = vec![
473 OptimizationStrategy::conservative(),
474 OptimizationStrategy::default(),
475 OptimizationStrategy::aggressive(),
476 ];
477
478 let mut best_strategy = strategies[0].clone();
479 let mut best_score = 0.0;
480
481 for strategy in strategies {
483 let profile = self.average_profile();
485 let score = strategy.score(&profile);
486
487 if score > best_score {
488 best_score = score;
489 best_strategy = strategy.clone();
490 }
491 }
492
493 self.current_strategy = best_strategy.clone();
494 self.best_strategy = Some(best_strategy.clone());
495 self.best_score = best_score;
496
497 Ok(best_strategy)
498 }
499
500 pub fn generate_report(&self) -> OptimizationReport {
502 let baseline_profile = self.profiles.first();
503 let current_profile = self.profiles.last();
504
505 let speedup = if let (Some(baseline), Some(current)) = (baseline_profile, current_profile) {
506 baseline.execution_time_us as f64 / current.execution_time_us.max(1) as f64
507 } else {
508 1.0
509 };
510
511 OptimizationReport {
512 goal: self.goal,
513 total_profiles: self.profiles.len(),
514 hotspots_detected: self.hotspots.len(),
515 current_strategy: self.current_strategy.clone(),
516 best_strategy: self.best_strategy.clone(),
517 speedup,
518 memory_reduction: 0.0, tuning_runs: self.tuning_config.measurement_runs,
520 }
521 }
522
523 pub fn reset(&mut self) {
525 self.profiles.clear();
526 self.hotspots.clear();
527 self.executions_since_optimization = 0;
528 self.best_strategy = None;
529 self.best_score = 0.0;
530 }
531}
532
533impl Default for ProfilingOptimizer {
534 fn default() -> Self {
535 Self::new()
536 }
537}
538
539#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
541pub struct OptimizationReport {
542 pub goal: OptimizationGoal,
544
545 pub total_profiles: usize,
547
548 pub hotspots_detected: usize,
550
551 pub current_strategy: OptimizationStrategy,
553
554 pub best_strategy: Option<OptimizationStrategy>,
556
557 pub speedup: f64,
559
560 pub memory_reduction: f64,
562
563 pub tuning_runs: usize,
565}
566
567impl std::fmt::Display for OptimizationReport {
568 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
569 writeln!(f, "Profiling-Guided Optimization Report")?;
570 writeln!(f, "=====================================")?;
571 writeln!(f, "Goal: {:?}", self.goal)?;
572 writeln!(f, "Profiles: {}", self.total_profiles)?;
573 writeln!(f, "Hotspots: {}", self.hotspots_detected)?;
574 writeln!(f, "Speedup: {:.2}x", self.speedup)?;
575 writeln!(f, "Memory reduction: {:.1}%", self.memory_reduction)?;
576 writeln!(f, "Tuning runs: {}", self.tuning_runs)?;
577
578 if let Some(best) = &self.best_strategy {
579 writeln!(f, "\nBest Strategy:")?;
580 writeln!(f, " Fusion: {}", best.enable_fusion)?;
581 writeln!(f, " Parallelism: {}", best.enable_parallelism)?;
582 writeln!(f, " SIMD: {}", best.enable_simd)?;
583 }
584
585 Ok(())
586 }
587}
588
589#[cfg(test)]
590mod tests {
591 use super::*;
592
593 #[test]
594 fn test_execution_profile() {
595 let profile = ExecutionProfile::new(1000, 1024 * 1024);
596 assert_eq!(profile.execution_time_us, 1000);
597 assert_eq!(profile.memory_bytes, 1024 * 1024);
598 assert_eq!(profile.execution_time_ms(), 1.0);
599 assert!((profile.memory_mb() - 1.0).abs() < 0.01);
600 }
601
602 #[test]
603 fn test_execution_profile_throughput() {
604 let mut profile = ExecutionProfile::new(1_000_000, 0);
605 profile.operations_count = 1000;
606 assert_eq!(profile.throughput(), 1000.0);
607 }
608
609 #[test]
610 fn test_hotspot_is_critical() {
611 let hotspot = Hotspot {
612 identifier: "op1".to_string(),
613 time_percentage: 15.0,
614 execution_count: 100,
615 avg_time_us: 100.0,
616 suggestions: Vec::new(),
617 };
618
619 assert!(hotspot.is_critical());
620 }
621
622 #[test]
623 fn test_hotspot_total_time() {
624 let hotspot = Hotspot {
625 identifier: "op1".to_string(),
626 time_percentage: 10.0,
627 execution_count: 100,
628 avg_time_us: 50.0,
629 suggestions: Vec::new(),
630 };
631
632 assert_eq!(hotspot.total_time_us(), 5000.0);
633 }
634
635 #[test]
636 fn test_optimization_strategy_default() {
637 let strategy = OptimizationStrategy::default();
638 assert!(strategy.enable_fusion);
639 assert!(strategy.enable_parallelism);
640 }
641
642 #[test]
643 fn test_optimization_strategy_conservative() {
644 let strategy = OptimizationStrategy::conservative();
645 assert!(!strategy.enable_fusion);
646 assert!(!strategy.enable_parallelism);
647 }
648
649 #[test]
650 fn test_optimization_strategy_aggressive() {
651 let strategy = OptimizationStrategy::aggressive();
652 assert!(strategy.enable_fusion);
653 assert!(strategy.enable_parallelism);
654 assert!(strategy.enable_simd);
655 }
656
657 #[test]
658 fn test_profiling_optimizer_creation() {
659 let optimizer = ProfilingOptimizer::new();
660 assert_eq!(optimizer.goal, OptimizationGoal::Balanced);
661 assert_eq!(optimizer.profiles.len(), 0);
662 }
663
664 #[test]
665 fn test_profiling_optimizer_with_goal() {
666 let optimizer = ProfilingOptimizer::new().with_goal(OptimizationGoal::MinimizeLatency);
667 assert_eq!(optimizer.goal, OptimizationGoal::MinimizeLatency);
668 }
669
670 #[test]
671 fn test_profiling_optimizer_record_profile() {
672 let mut optimizer = ProfilingOptimizer::new();
673 let profile = ExecutionProfile::new(1000, 1024);
674
675 optimizer.record_profile(profile);
676 assert_eq!(optimizer.profiles.len(), 1);
677 assert_eq!(optimizer.executions_since_optimization, 1);
678 }
679
680 #[test]
681 fn test_profiling_optimizer_should_reoptimize() {
682 let mut optimizer = ProfilingOptimizer::new();
683 optimizer.reoptimization_threshold = 5;
684
685 assert!(!optimizer.should_reoptimize());
686
687 for _ in 0..5 {
688 optimizer.record_profile(ExecutionProfile::new(1000, 1024));
689 }
690
691 assert!(optimizer.should_reoptimize());
692 }
693
694 #[test]
695 fn test_profiling_optimizer_detect_hotspots() {
696 let mut optimizer = ProfilingOptimizer::new();
697 optimizer.record_profile(ExecutionProfile::new(1000, 1024));
698
699 let hotspots = optimizer.detect_hotspots();
700 assert!(!hotspots.is_empty());
701 }
702
703 #[test]
704 fn test_profiling_optimizer_auto_tune() {
705 let mut optimizer = ProfilingOptimizer::new();
706 optimizer.record_profile(ExecutionProfile::new(1000, 1024));
707
708 let result = optimizer.auto_tune();
709 assert!(result.is_ok());
710 assert!(optimizer.best_strategy.is_some());
711 }
712
713 #[test]
714 fn test_profiling_optimizer_generate_report() {
715 let mut optimizer = ProfilingOptimizer::new();
716 optimizer.record_profile(ExecutionProfile::new(2000, 1024));
717 optimizer.record_profile(ExecutionProfile::new(1000, 512));
718
719 let report = optimizer.generate_report();
720 assert_eq!(report.total_profiles, 2);
721 assert!(report.speedup > 1.0);
722 }
723
724 #[test]
725 fn test_profiling_optimizer_reset() {
726 let mut optimizer = ProfilingOptimizer::new();
727 optimizer.record_profile(ExecutionProfile::new(1000, 1024));
728
729 optimizer.reset();
730 assert_eq!(optimizer.profiles.len(), 0);
731 assert_eq!(optimizer.executions_since_optimization, 0);
732 }
733
734 #[test]
735 fn test_tuning_config_default() {
736 let config = TuningConfig::default();
737 assert_eq!(config.warmup_runs, 3);
738 assert_eq!(config.measurement_runs, 5);
739 }
740
741 #[test]
742 fn test_optimization_report_display() {
743 let report = OptimizationReport {
744 goal: OptimizationGoal::MinimizeLatency,
745 total_profiles: 100,
746 hotspots_detected: 5,
747 current_strategy: OptimizationStrategy::default(),
748 best_strategy: Some(OptimizationStrategy::aggressive()),
749 speedup: 2.5,
750 memory_reduction: 30.0,
751 tuning_runs: 10,
752 };
753
754 let display = format!("{}", report);
755 assert!(display.contains("Speedup: 2.50x"));
756 assert!(display.contains("Memory reduction: 30.0%"));
757 }
758}