1use std::collections::HashMap;
7use std::sync::{Arc, Mutex, RwLock};
8use std::time::{Duration, Instant};
9
10#[derive(Debug, Clone)]
12pub struct PerformanceProfile {
13 pub function_profiles: HashMap<String, FunctionProfile>,
14 pub loop_profiles: HashMap<String, LoopProfile>,
15 pub memory_access_patterns: HashMap<String, MemoryAccessPattern>,
16 pub branch_predictions: HashMap<String, BranchProfile>,
17 pub cache_statistics: CacheStatistics,
18 pub instruction_mix: InstructionMix,
19 pub profiling_duration: Duration,
20 pub total_samples: u64,
21}
22
23#[derive(Debug, Clone)]
25pub struct FunctionProfile {
26 pub name: String,
27 pub total_time: Duration,
28 pub self_time: Duration,
29 pub call_count: u64,
30 pub avg_time_per_call: Duration,
31 pub max_time_per_call: Duration,
32 pub min_time_per_call: Duration,
33 pub cpu_cycles: u64,
34 pub cache_misses: u64,
35 pub branch_misses: u64,
36 pub hotness_score: f64,
37 pub optimization_opportunities: Vec<OptimizationOpportunity>,
38}
39
40#[derive(Debug, Clone)]
42pub struct LoopProfile {
43 pub loop_id: String,
44 pub location: String,
45 pub iteration_count: u64,
46 pub total_time: Duration,
47 pub avg_time_per_iteration: Duration,
48 pub vectorization_efficiency: f64,
49 pub dependency_chains: Vec<DependencyChain>,
50 pub memory_access_stride: i64,
51 pub loop_carried_dependencies: u32,
52 pub optimization_potential: f64,
53}
54
55#[derive(Debug, Clone)]
57pub struct MemoryAccessPattern {
58 pub function_name: String,
59 pub access_type: MemoryAccessType,
60 pub access_frequency: u64,
61 pub cache_hit_rate: f64,
62 pub average_latency: Duration,
63 pub stride_pattern: StridePattern,
64 pub prefetch_effectiveness: f64,
65 pub numa_locality: f64,
66}
67
68#[derive(Debug, Clone)]
70pub struct BranchProfile {
71 pub branch_id: String,
72 pub location: String,
73 pub taken_count: u64,
74 pub not_taken_count: u64,
75 pub prediction_accuracy: f64,
76 pub misprediction_penalty: Duration,
77 pub branch_type: BranchType,
78}
79
80#[derive(Debug, Clone)]
82pub struct CacheStatistics {
83 pub l1_hit_rate: f64,
84 pub l2_hit_rate: f64,
85 pub l3_hit_rate: f64,
86 pub tlb_hit_rate: f64,
87 pub cache_line_utilization: f64,
88 pub false_sharing_incidents: u64,
89 pub prefetch_accuracy: f64,
90}
91
92#[derive(Debug, Clone)]
94pub struct InstructionMix {
95 pub integer_ops: u64,
96 pub floating_point_ops: u64,
97 pub vector_ops: u64,
98 pub memory_ops: u64,
99 pub branch_ops: u64,
100 pub simd_utilization: f64,
101 pub parallel_efficiency: f64,
102}
103
104#[derive(Debug, Clone)]
106pub struct OptimizationOpportunity {
107 pub opportunity_type: OptimizationType,
108 pub description: String,
109 pub potential_speedup: f64,
110 pub implementation_effort: ImplementationEffort,
111 pub confidence: f64,
112 pub code_location: String,
113 pub suggested_actions: Vec<String>,
114}
115
116#[derive(Debug, Clone)]
118pub struct DependencyChain {
119 pub chain_id: String,
120 pub length: u32,
121 pub critical_path_time: Duration,
122 pub parallelization_potential: f64,
123}
124
125#[derive(Debug, Clone, PartialEq)]
127pub enum MemoryAccessType {
128 Sequential,
129 Random,
130 Strided,
131 Gather,
132 Scatter,
133}
134
135#[derive(Debug, Clone)]
137pub struct StridePattern {
138 pub primary_stride: i64,
139 pub secondary_stride: Option<i64>,
140 pub regularity: f64,
141 pub predictability: f64,
142}
143
144#[derive(Debug, Clone, PartialEq)]
146pub enum BranchType {
147 Conditional,
148 Indirect,
149 Return,
150 Call,
151 Loop,
152}
153
154#[derive(Debug, Clone, PartialEq)]
156pub enum OptimizationType {
157 Vectorization,
158 LoopUnrolling,
159 FunctionInlining,
160 MemoryPrefetching,
161 BranchElimination,
162 CacheOptimization,
163 Parallelization,
164 AlgorithmicImprovement,
165}
166
167#[derive(Debug, Clone, PartialEq)]
169pub enum ImplementationEffort {
170 Low,
171 Medium,
172 High,
173 VeryHigh,
174}
175
176pub struct ProfileGuidedOptimizer {
178 profiles: Arc<RwLock<HashMap<String, PerformanceProfile>>>,
179 optimization_rules: Vec<OptimizationRule>,
180 #[allow(dead_code)]
181 performance_targets: PerformanceTargets,
182 profiler_config: ProfilerConfig,
183 optimization_history: Arc<Mutex<Vec<OptimizationApplication>>>,
184}
185
186#[derive(Debug, Clone)]
188pub struct OptimizationRule {
189 pub name: String,
190 pub trigger_condition: TriggerCondition,
191 pub optimization_type: OptimizationType,
192 pub implementation: String,
193 pub expected_benefit: f64,
194 pub risk_level: RiskLevel,
195}
196
197#[derive(Debug, Clone)]
199pub struct TriggerCondition {
200 pub min_hotness_score: f64,
201 pub min_call_frequency: u64,
202 pub max_cache_miss_rate: f64,
203 pub min_loop_iterations: u64,
204 pub function_name_patterns: Vec<String>,
205}
206
207#[derive(Debug, Clone)]
209pub struct PerformanceTargets {
210 pub target_speedup: f64,
211 pub max_memory_increase: f64,
212 pub max_compilation_time: Duration,
213 pub stability_requirement: f64,
214}
215
216#[derive(Debug, Clone)]
218pub struct ProfilerConfig {
219 pub sampling_frequency: u64,
220 pub enable_instruction_profiling: bool,
221 pub enable_memory_profiling: bool,
222 pub enable_cache_profiling: bool,
223 pub enable_branch_profiling: bool,
224 pub profiling_duration: Duration,
225}
226
227#[derive(Debug, Clone)]
229pub struct OptimizationApplication {
230 pub timestamp: Instant,
231 pub rule_name: String,
232 pub target_function: String,
233 pub optimization_type: OptimizationType,
234 pub measured_speedup: Option<f64>,
235 pub success: bool,
236 pub notes: String,
237}
238
239#[derive(Debug, Clone, PartialEq)]
241pub enum RiskLevel {
242 Low,
243 Medium,
244 High,
245 Experimental,
246}
247
248impl ProfileGuidedOptimizer {
249 pub fn new(config: ProfilerConfig, targets: PerformanceTargets) -> Self {
251 let mut optimizer = Self {
252 profiles: Arc::new(RwLock::new(HashMap::new())),
253 optimization_rules: Vec::new(),
254 performance_targets: targets,
255 profiler_config: config,
256 optimization_history: Arc::new(Mutex::new(Vec::new())),
257 };
258
259 optimizer.initialize_default_rules();
260 optimizer
261 }
262
263 fn initialize_default_rules(&mut self) {
265 self.optimization_rules = vec![
266 OptimizationRule {
267 name: "Hot Function Inlining".to_string(),
268 trigger_condition: TriggerCondition {
269 min_hotness_score: 0.8,
270 min_call_frequency: 1000,
271 max_cache_miss_rate: 1.0,
272 min_loop_iterations: 0,
273 function_name_patterns: vec![".*_hot.*".to_string()],
274 },
275 optimization_type: OptimizationType::FunctionInlining,
276 implementation: "#[inline(always)]".to_string(),
277 expected_benefit: 1.15,
278 risk_level: RiskLevel::Low,
279 },
280 OptimizationRule {
281 name: "Loop Vectorization".to_string(),
282 trigger_condition: TriggerCondition {
283 min_hotness_score: 0.6,
284 min_call_frequency: 0,
285 max_cache_miss_rate: 1.0,
286 min_loop_iterations: 100,
287 function_name_patterns: vec![".*_vectorizable.*".to_string()],
288 },
289 optimization_type: OptimizationType::Vectorization,
290 implementation: "SIMD optimization".to_string(),
291 expected_benefit: 2.0,
292 risk_level: RiskLevel::Medium,
293 },
294 OptimizationRule {
295 name: "Memory Prefetching".to_string(),
296 trigger_condition: TriggerCondition {
297 min_hotness_score: 0.5,
298 min_call_frequency: 0,
299 max_cache_miss_rate: 0.1,
300 min_loop_iterations: 0,
301 function_name_patterns: vec![".*_memory_intensive.*".to_string()],
302 },
303 optimization_type: OptimizationType::MemoryPrefetching,
304 implementation: "Software prefetching".to_string(),
305 expected_benefit: 1.3,
306 risk_level: RiskLevel::Medium,
307 },
308 OptimizationRule {
309 name: "Loop Unrolling".to_string(),
310 trigger_condition: TriggerCondition {
311 min_hotness_score: 0.7,
312 min_call_frequency: 0,
313 max_cache_miss_rate: 1.0,
314 min_loop_iterations: 10,
315 function_name_patterns: vec![".*_tight_loop.*".to_string()],
316 },
317 optimization_type: OptimizationType::LoopUnrolling,
318 implementation: "Unroll factor 4".to_string(),
319 expected_benefit: 1.25,
320 risk_level: RiskLevel::Low,
321 },
322 ];
323 }
324
325 pub fn add_optimization_rule(&mut self, rule: OptimizationRule) {
327 self.optimization_rules.push(rule);
328 }
329
330 pub fn collect_profile(&self, program_name: &str) -> Result<PerformanceProfile, ProfileError> {
332 let mock_profile = PerformanceProfile {
334 function_profiles: self.generate_mock_function_profiles(),
335 loop_profiles: self.generate_mock_loop_profiles(),
336 memory_access_patterns: self.generate_mock_memory_patterns(),
337 branch_predictions: self.generate_mock_branch_profiles(),
338 cache_statistics: CacheStatistics {
339 l1_hit_rate: 0.95,
340 l2_hit_rate: 0.85,
341 l3_hit_rate: 0.70,
342 tlb_hit_rate: 0.98,
343 cache_line_utilization: 0.75,
344 false_sharing_incidents: 5,
345 prefetch_accuracy: 0.80,
346 },
347 instruction_mix: InstructionMix {
348 integer_ops: 1_000_000,
349 floating_point_ops: 500_000,
350 vector_ops: 100_000,
351 memory_ops: 200_000,
352 branch_ops: 150_000,
353 simd_utilization: 0.60,
354 parallel_efficiency: 0.75,
355 },
356 profiling_duration: self.profiler_config.profiling_duration,
357 total_samples: 1_000_000,
358 };
359
360 self.profiles
361 .write()
362 .unwrap()
363 .insert(program_name.to_string(), mock_profile.clone());
364 Ok(mock_profile)
365 }
366
367 fn generate_mock_function_profiles(&self) -> HashMap<String, FunctionProfile> {
369 let mut profiles = HashMap::new();
370
371 profiles.insert(
372 "matrix_multiply".to_string(),
373 FunctionProfile {
374 name: "matrix_multiply".to_string(),
375 total_time: Duration::from_millis(500),
376 self_time: Duration::from_millis(450),
377 call_count: 1000,
378 avg_time_per_call: Duration::from_micros(500),
379 max_time_per_call: Duration::from_millis(2),
380 min_time_per_call: Duration::from_micros(100),
381 cpu_cycles: 1_000_000,
382 cache_misses: 5000,
383 branch_misses: 100,
384 hotness_score: 0.9,
385 optimization_opportunities: vec![OptimizationOpportunity {
386 opportunity_type: OptimizationType::Vectorization,
387 description: "Loop can be vectorized for SIMD".to_string(),
388 potential_speedup: 2.5,
389 implementation_effort: ImplementationEffort::Medium,
390 confidence: 0.85,
391 code_location: "matrix_multiply.rs:45".to_string(),
392 suggested_actions: vec![
393 "Use SIMD intrinsics".to_string(),
394 "Enable auto-vectorization".to_string(),
395 ],
396 }],
397 },
398 );
399
400 profiles.insert(
401 "activation_function".to_string(),
402 FunctionProfile {
403 name: "activation_function".to_string(),
404 total_time: Duration::from_millis(200),
405 self_time: Duration::from_millis(180),
406 call_count: 10_000,
407 avg_time_per_call: Duration::from_micros(20),
408 max_time_per_call: Duration::from_micros(100),
409 min_time_per_call: Duration::from_micros(5),
410 cpu_cycles: 400_000,
411 cache_misses: 1000,
412 branch_misses: 50,
413 hotness_score: 0.7,
414 optimization_opportunities: vec![OptimizationOpportunity {
415 opportunity_type: OptimizationType::FunctionInlining,
416 description: "Small function called frequently".to_string(),
417 potential_speedup: 1.15,
418 implementation_effort: ImplementationEffort::Low,
419 confidence: 0.95,
420 code_location: "activation.rs:12".to_string(),
421 suggested_actions: vec!["Add inline attribute".to_string()],
422 }],
423 },
424 );
425
426 profiles
427 }
428
429 fn generate_mock_loop_profiles(&self) -> HashMap<String, LoopProfile> {
431 let mut profiles = HashMap::new();
432
433 profiles.insert(
434 "training_loop".to_string(),
435 LoopProfile {
436 loop_id: "training_loop".to_string(),
437 location: "train.rs:100".to_string(),
438 iteration_count: 1000,
439 total_time: Duration::from_millis(1000),
440 avg_time_per_iteration: Duration::from_millis(1),
441 vectorization_efficiency: 0.4,
442 dependency_chains: vec![DependencyChain {
443 chain_id: "weight_update".to_string(),
444 length: 3,
445 critical_path_time: Duration::from_micros(100),
446 parallelization_potential: 0.8,
447 }],
448 memory_access_stride: 8,
449 loop_carried_dependencies: 1,
450 optimization_potential: 0.6,
451 },
452 );
453
454 profiles
455 }
456
457 fn generate_mock_memory_patterns(&self) -> HashMap<String, MemoryAccessPattern> {
459 let mut patterns = HashMap::new();
460
461 patterns.insert(
462 "data_loading".to_string(),
463 MemoryAccessPattern {
464 function_name: "data_loading".to_string(),
465 access_type: MemoryAccessType::Sequential,
466 access_frequency: 10_000,
467 cache_hit_rate: 0.85,
468 average_latency: Duration::from_nanos(50),
469 stride_pattern: StridePattern {
470 primary_stride: 8,
471 secondary_stride: None,
472 regularity: 0.95,
473 predictability: 0.90,
474 },
475 prefetch_effectiveness: 0.75,
476 numa_locality: 0.80,
477 },
478 );
479
480 patterns
481 }
482
483 fn generate_mock_branch_profiles(&self) -> HashMap<String, BranchProfile> {
485 let mut profiles = HashMap::new();
486
487 profiles.insert(
488 "convergence_check".to_string(),
489 BranchProfile {
490 branch_id: "convergence_check".to_string(),
491 location: "optimizer.rs:200".to_string(),
492 taken_count: 950,
493 not_taken_count: 50,
494 prediction_accuracy: 0.95,
495 misprediction_penalty: Duration::from_nanos(20),
496 branch_type: BranchType::Conditional,
497 },
498 );
499
500 profiles
501 }
502
503 pub fn analyze_and_recommend(
505 &self,
506 program_name: &str,
507 ) -> Result<Vec<OptimizationRecommendation>, ProfileError> {
508 let profiles = self.profiles.read().unwrap();
509 let profile = profiles
510 .get(program_name)
511 .ok_or(ProfileError::ProfileNotFound)?;
512
513 let mut recommendations = Vec::new();
514
515 for func_profile in profile.function_profiles.values() {
517 for rule in &self.optimization_rules {
518 if self.matches_trigger_condition(&rule.trigger_condition, func_profile) {
519 recommendations.push(OptimizationRecommendation {
520 rule_name: rule.name.clone(),
521 target_function: func_profile.name.clone(),
522 optimization_type: rule.optimization_type.clone(),
523 expected_speedup: rule.expected_benefit,
524 risk_level: rule.risk_level.clone(),
525 implementation: rule.implementation.clone(),
526 priority: self.calculate_priority(func_profile, rule),
527 estimated_effort: ImplementationEffort::Medium,
528 confidence: 0.8,
529 });
530 }
531 }
532 }
533
534 recommendations.sort_by(|a, b| {
536 b.priority
537 .partial_cmp(&a.priority)
538 .unwrap_or(std::cmp::Ordering::Equal)
539 });
540
541 Ok(recommendations)
542 }
543
544 fn matches_trigger_condition(
546 &self,
547 condition: &TriggerCondition,
548 profile: &FunctionProfile,
549 ) -> bool {
550 profile.hotness_score >= condition.min_hotness_score
551 && profile.call_count >= condition.min_call_frequency
552 && (profile.cache_misses as f64 / profile.call_count as f64)
553 <= condition.max_cache_miss_rate
554 }
555
556 fn calculate_priority(&self, profile: &FunctionProfile, rule: &OptimizationRule) -> f64 {
558 let hotness_factor = profile.hotness_score;
559 let benefit_factor = rule.expected_benefit - 1.0; let risk_factor = match rule.risk_level {
561 RiskLevel::Low => 1.0,
562 RiskLevel::Medium => 0.8,
563 RiskLevel::High => 0.6,
564 RiskLevel::Experimental => 0.4,
565 };
566
567 hotness_factor * benefit_factor * risk_factor
568 }
569
570 pub fn apply_optimization(
572 &self,
573 recommendation: &OptimizationRecommendation,
574 ) -> Result<(), ProfileError> {
575 let application = OptimizationApplication {
577 timestamp: Instant::now(),
578 rule_name: recommendation.rule_name.clone(),
579 target_function: recommendation.target_function.clone(),
580 optimization_type: recommendation.optimization_type.clone(),
581 measured_speedup: Some(recommendation.expected_speedup * 0.9), success: true,
583 notes: format!(
584 "Applied {} to {}",
585 recommendation.implementation, recommendation.target_function
586 ),
587 };
588
589 self.optimization_history.lock().unwrap().push(application);
590 Ok(())
591 }
592
593 pub fn get_optimization_history(&self) -> Vec<OptimizationApplication> {
595 self.optimization_history.lock().unwrap().clone()
596 }
597
598 pub fn calculate_performance_gain(&self) -> f64 {
600 let history = self.optimization_history.lock().unwrap();
601 let successful_optimizations: Vec<_> = history
602 .iter()
603 .filter(|app| app.success && app.measured_speedup.is_some())
604 .collect();
605
606 if successful_optimizations.is_empty() {
607 return 1.0;
608 }
609
610 successful_optimizations
612 .iter()
613 .map(|app| app.measured_speedup.unwrap())
614 .fold(1.0, |acc, speedup| acc * speedup)
615 }
616
617 pub fn generate_report(&self, program_name: &str) -> Result<OptimizationReport, ProfileError> {
619 let profiles = self.profiles.read().unwrap();
620 let profile = profiles
621 .get(program_name)
622 .ok_or(ProfileError::ProfileNotFound)?;
623
624 let recommendations = self.analyze_and_recommend(program_name)?;
625 let history = self.get_optimization_history();
626 let performance_gain = self.calculate_performance_gain();
627
628 let potential_further_gains = recommendations
629 .iter()
630 .map(|r| r.expected_speedup - 1.0)
631 .sum::<f64>();
632
633 Ok(OptimizationReport {
634 program_name: program_name.to_string(),
635 profile_summary: ProfileSummary {
636 total_functions: profile.function_profiles.len(),
637 hot_functions: profile
638 .function_profiles
639 .values()
640 .filter(|f| f.hotness_score > 0.5)
641 .count(),
642 total_loops: profile.loop_profiles.len(),
643 vectorizable_loops: profile
644 .loop_profiles
645 .values()
646 .filter(|l| l.vectorization_efficiency < 0.5)
647 .count(),
648 cache_efficiency: profile.cache_statistics.l1_hit_rate,
649 simd_utilization: profile.instruction_mix.simd_utilization,
650 },
651 recommendations,
652 applied_optimizations: history,
653 overall_performance_gain: performance_gain,
654 potential_further_gains,
655 report_timestamp: Instant::now(),
656 })
657 }
658
659 pub fn reset(&self) {
661 self.profiles.write().unwrap().clear();
662 self.optimization_history.lock().unwrap().clear();
663 }
664}
665
666#[derive(Debug, Clone)]
668pub struct OptimizationRecommendation {
669 pub rule_name: String,
670 pub target_function: String,
671 pub optimization_type: OptimizationType,
672 pub expected_speedup: f64,
673 pub risk_level: RiskLevel,
674 pub implementation: String,
675 pub priority: f64,
676 pub estimated_effort: ImplementationEffort,
677 pub confidence: f64,
678}
679
680#[derive(Debug, Clone)]
682pub struct ProfileSummary {
683 pub total_functions: usize,
684 pub hot_functions: usize,
685 pub total_loops: usize,
686 pub vectorizable_loops: usize,
687 pub cache_efficiency: f64,
688 pub simd_utilization: f64,
689}
690
691#[derive(Debug, Clone)]
693pub struct OptimizationReport {
694 pub program_name: String,
695 pub profile_summary: ProfileSummary,
696 pub recommendations: Vec<OptimizationRecommendation>,
697 pub applied_optimizations: Vec<OptimizationApplication>,
698 pub overall_performance_gain: f64,
699 pub potential_further_gains: f64,
700 pub report_timestamp: Instant,
701}
702
703#[derive(Debug, thiserror::Error)]
705pub enum ProfileError {
706 #[error("Profile not found")]
707 ProfileNotFound,
708 #[error("Profiling failed: {0}")]
709 ProfilingFailed(String),
710 #[error("Optimization failed: {0}")]
711 OptimizationFailed(String),
712 #[error("Invalid configuration: {0}")]
713 InvalidConfiguration(String),
714}
715
716impl Default for ProfilerConfig {
717 fn default() -> Self {
718 Self {
719 sampling_frequency: 1000,
720 enable_instruction_profiling: true,
721 enable_memory_profiling: true,
722 enable_cache_profiling: true,
723 enable_branch_profiling: true,
724 profiling_duration: Duration::from_secs(10),
725 }
726 }
727}
728
729impl Default for PerformanceTargets {
730 fn default() -> Self {
731 Self {
732 target_speedup: 1.5,
733 max_memory_increase: 0.1,
734 max_compilation_time: Duration::from_secs(60),
735 stability_requirement: 0.95,
736 }
737 }
738}
739
740#[allow(non_snake_case)]
741#[cfg(test)]
742mod tests {
743 use super::*;
744
745 #[test]
746 fn test_optimizer_creation() {
747 let config = ProfilerConfig::default();
748 let targets = PerformanceTargets::default();
749 let optimizer = ProfileGuidedOptimizer::new(config, targets);
750
751 assert!(!optimizer.optimization_rules.is_empty());
752 }
753
754 #[test]
755 fn test_profile_collection() {
756 let config = ProfilerConfig::default();
757 let targets = PerformanceTargets::default();
758 let optimizer = ProfileGuidedOptimizer::new(config, targets);
759
760 let profile = optimizer.collect_profile("test_program").unwrap();
761 assert!(!profile.function_profiles.is_empty());
762 assert!(profile.total_samples > 0);
763 }
764
765 #[test]
766 fn test_optimization_recommendations() {
767 let config = ProfilerConfig::default();
768 let targets = PerformanceTargets::default();
769 let optimizer = ProfileGuidedOptimizer::new(config, targets);
770
771 optimizer.collect_profile("test_program").unwrap();
772 let recommendations = optimizer.analyze_and_recommend("test_program").unwrap();
773
774 assert!(!recommendations.is_empty());
775 assert!(recommendations[0].priority > 0.0);
776 }
777
778 #[test]
779 fn test_optimization_application() {
780 let config = ProfilerConfig::default();
781 let targets = PerformanceTargets::default();
782 let optimizer = ProfileGuidedOptimizer::new(config, targets);
783
784 optimizer.collect_profile("test_program").unwrap();
785 let recommendations = optimizer.analyze_and_recommend("test_program").unwrap();
786
787 if let Some(recommendation) = recommendations.first() {
788 assert!(optimizer.apply_optimization(recommendation).is_ok());
789
790 let history = optimizer.get_optimization_history();
791 assert!(!history.is_empty());
792 assert!(history[0].success);
793 }
794 }
795
796 #[test]
797 fn test_performance_gain_calculation() {
798 let config = ProfilerConfig::default();
799 let targets = PerformanceTargets::default();
800 let optimizer = ProfileGuidedOptimizer::new(config, targets);
801
802 optimizer.collect_profile("test_program").unwrap();
803 let recommendations = optimizer.analyze_and_recommend("test_program").unwrap();
804
805 for recommendation in recommendations.iter().take(2) {
806 optimizer.apply_optimization(recommendation).unwrap();
807 }
808
809 let gain = optimizer.calculate_performance_gain();
810 assert!(gain >= 1.0);
811 }
812
813 #[test]
814 fn test_optimization_report() {
815 let config = ProfilerConfig::default();
816 let targets = PerformanceTargets::default();
817 let optimizer = ProfileGuidedOptimizer::new(config, targets);
818
819 optimizer.collect_profile("test_program").unwrap();
820 let report = optimizer.generate_report("test_program").unwrap();
821
822 assert_eq!(report.program_name, "test_program");
823 assert!(report.profile_summary.total_functions > 0);
824 assert!(!report.recommendations.is_empty());
825 }
826
827 #[test]
828 fn test_custom_optimization_rule() {
829 let config = ProfilerConfig::default();
830 let targets = PerformanceTargets::default();
831 let mut optimizer = ProfileGuidedOptimizer::new(config, targets);
832
833 let custom_rule = OptimizationRule {
834 name: "Custom Parallel".to_string(),
835 trigger_condition: TriggerCondition {
836 min_hotness_score: 0.9,
837 min_call_frequency: 1000,
838 max_cache_miss_rate: 0.05,
839 min_loop_iterations: 1000,
840 function_name_patterns: vec!["parallel_*".to_string()],
841 },
842 optimization_type: OptimizationType::Parallelization,
843 implementation: "Use rayon parallel iterator".to_string(),
844 expected_benefit: 3.0,
845 risk_level: RiskLevel::Medium,
846 };
847
848 let initial_rules = optimizer.optimization_rules.len();
849 optimizer.add_optimization_rule(custom_rule);
850 assert_eq!(optimizer.optimization_rules.len(), initial_rules + 1);
851 }
852
853 #[test]
854 fn test_trigger_condition_matching() {
855 let config = ProfilerConfig::default();
856 let targets = PerformanceTargets::default();
857 let optimizer = ProfileGuidedOptimizer::new(config, targets);
858
859 let condition = TriggerCondition {
860 min_hotness_score: 0.5,
861 min_call_frequency: 100,
862 max_cache_miss_rate: 0.1,
863 min_loop_iterations: 0,
864 function_name_patterns: vec![],
865 };
866
867 let profile = FunctionProfile {
868 name: "test_function".to_string(),
869 total_time: Duration::from_millis(100),
870 self_time: Duration::from_millis(90),
871 call_count: 1000,
872 avg_time_per_call: Duration::from_micros(100),
873 max_time_per_call: Duration::from_millis(1),
874 min_time_per_call: Duration::from_micros(50),
875 cpu_cycles: 200_000,
876 cache_misses: 50, branch_misses: 10,
878 hotness_score: 0.8,
879 optimization_opportunities: vec![],
880 };
881
882 assert!(optimizer.matches_trigger_condition(&condition, &profile));
883 }
884
885 #[test]
886 fn test_priority_calculation() {
887 let config = ProfilerConfig::default();
888 let targets = PerformanceTargets::default();
889 let optimizer = ProfileGuidedOptimizer::new(config, targets);
890
891 let profile = FunctionProfile {
892 name: "test_function".to_string(),
893 total_time: Duration::from_millis(100),
894 self_time: Duration::from_millis(90),
895 call_count: 1000,
896 avg_time_per_call: Duration::from_micros(100),
897 max_time_per_call: Duration::from_millis(1),
898 min_time_per_call: Duration::from_micros(50),
899 cpu_cycles: 200_000,
900 cache_misses: 50,
901 branch_misses: 10,
902 hotness_score: 0.8,
903 optimization_opportunities: vec![],
904 };
905
906 let rule = OptimizationRule {
907 name: "Test Rule".to_string(),
908 trigger_condition: TriggerCondition {
909 min_hotness_score: 0.5,
910 min_call_frequency: 100,
911 max_cache_miss_rate: 0.1,
912 min_loop_iterations: 0,
913 function_name_patterns: vec![],
914 },
915 optimization_type: OptimizationType::FunctionInlining,
916 implementation: "inline".to_string(),
917 expected_benefit: 1.5,
918 risk_level: RiskLevel::Low,
919 };
920
921 let priority = optimizer.calculate_priority(&profile, &rule);
922 assert!(priority > 0.0);
923 }
924}