1use crate::error::{QuantRS2Error, QuantRS2Result};
14use crate::platform::PlatformCapabilities;
15use scirs2_core::Complex64;
16use std::collections::HashMap;
17use std::sync::{Arc, Mutex, RwLock};
18use std::time::{Duration, Instant};
19
20#[derive(Debug, Clone)]
22pub struct AdaptiveOptimizationConfig {
23 pub enable_workload_profiling: bool,
25 pub enable_memory_optimization: bool,
27 pub enable_power_optimization: bool,
29 pub min_samples_for_adaptation: usize,
31 pub variance_threshold: f64,
33 pub enable_runtime_benchmarking: bool,
35 pub benchmark_samples: usize,
37}
38
39impl Default for AdaptiveOptimizationConfig {
40 fn default() -> Self {
41 Self {
42 enable_workload_profiling: true,
43 enable_memory_optimization: true,
44 enable_power_optimization: false, min_samples_for_adaptation: 10,
46 variance_threshold: 0.2,
47 enable_runtime_benchmarking: true,
48 benchmark_samples: 5,
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct WorkloadCharacteristics {
56 pub num_qubits: usize,
58 pub num_gates: usize,
60 pub circuit_depth: usize,
62 pub access_pattern: AccessPattern,
64 pub computational_intensity: f64,
66 pub expected_iterations: usize,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum AccessPattern {
73 Sequential,
75 Strided,
77 Random,
79 Mixed,
81}
82
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
85pub enum OptimizationStrategy {
86 Throughput,
88 Latency,
90 Balanced,
92 MemoryBound,
94 PowerEfficient,
96}
97
98#[derive(Debug, Clone)]
100pub struct PerformanceProfile {
101 pub avg_time: Duration,
103 pub std_dev: Duration,
105 pub min_time: Duration,
107 pub max_time: Duration,
109 pub sample_count: usize,
111 pub best_strategy: OptimizationStrategy,
113 pub memory_bandwidth_gbps: f64,
115 pub gflops: f64,
117}
118
119#[derive(Debug, Clone)]
121pub struct HardwareAssessment {
122 pub capabilities: PlatformCapabilities,
124 pub peak_memory_bandwidth: f64,
126 pub peak_gflops: f64,
128 pub optimal_batch_size: usize,
130 pub optimal_tile_size: usize,
132 pub max_efficient_state_size: usize,
134}
135
136impl HardwareAssessment {
137 pub fn from_capabilities(capabilities: PlatformCapabilities) -> Self {
139 let peak_memory_bandwidth = Self::estimate_memory_bandwidth(&capabilities);
141 let peak_gflops = Self::estimate_peak_gflops(&capabilities);
142 let optimal_batch_size = Self::compute_optimal_batch_size(&capabilities);
143 let optimal_tile_size = Self::compute_optimal_tile_size(&capabilities);
144 let max_efficient_state_size = Self::compute_max_efficient_state_size(&capabilities);
145
146 Self {
147 capabilities,
148 peak_memory_bandwidth,
149 peak_gflops,
150 optimal_batch_size,
151 optimal_tile_size,
152 max_efficient_state_size,
153 }
154 }
155
156 fn estimate_memory_bandwidth(capabilities: &PlatformCapabilities) -> f64 {
157 let cores = capabilities.cpu.logical_cores as f64;
159 let base_bandwidth: f64 = 25.6; (base_bandwidth * 2.0 * 0.8).min(cores * 10.0)
163 }
164
165 fn estimate_peak_gflops(capabilities: &PlatformCapabilities) -> f64 {
166 let cores = capabilities.cpu.logical_cores as f64;
167 let base_gflops_per_core = if capabilities.cpu.simd.avx512 {
168 100.0
169 } else if capabilities.cpu.simd.avx2 {
170 50.0
171 } else {
172 25.0
173 };
174 cores * base_gflops_per_core
175 }
176
177 fn compute_optimal_batch_size(capabilities: &PlatformCapabilities) -> usize {
178 let l3_cache = capabilities.cpu.cache.l3.unwrap_or(8 * 1024 * 1024);
179 let complex_size = std::mem::size_of::<Complex64>();
181 (l3_cache / (complex_size * 16)).max(32).min(1024)
182 }
183
184 fn compute_optimal_tile_size(capabilities: &PlatformCapabilities) -> usize {
185 let l2_cache = capabilities.cpu.cache.l2.unwrap_or(256 * 1024);
186 let complex_size = std::mem::size_of::<Complex64>();
188 let elements = l2_cache / (complex_size * 4); (elements as f64).sqrt() as usize
190 }
191
192 fn compute_max_efficient_state_size(capabilities: &PlatformCapabilities) -> usize {
193 let total_cache = capabilities.cpu.cache.l3.unwrap_or(8 * 1024 * 1024);
194 let cores = capabilities.cpu.logical_cores;
195 let complex_size = std::mem::size_of::<Complex64>();
197 (total_cache * cores) / (complex_size * 2)
198 }
199}
200
201pub struct AdaptiveHardwareOptimizer {
203 config: AdaptiveOptimizationConfig,
205 hardware: HardwareAssessment,
207 profiles: RwLock<HashMap<String, PerformanceProfile>>,
209 current_strategy: Mutex<OptimizationStrategy>,
211 history: RwLock<Vec<OptimizationEvent>>,
213}
214
215#[derive(Debug, Clone)]
217pub struct OptimizationEvent {
218 pub timestamp: Instant,
220 pub workload_key: String,
222 pub strategy: OptimizationStrategy,
224 pub execution_time: Duration,
226 pub was_optimal: bool,
228}
229
230impl AdaptiveHardwareOptimizer {
231 pub fn new(config: AdaptiveOptimizationConfig) -> Self {
233 let capabilities = PlatformCapabilities::detect();
234 let hardware = HardwareAssessment::from_capabilities(capabilities);
235
236 Self {
237 config,
238 hardware,
239 profiles: RwLock::new(HashMap::new()),
240 current_strategy: Mutex::new(OptimizationStrategy::Balanced),
241 history: RwLock::new(Vec::new()),
242 }
243 }
244
245 pub fn hardware_assessment(&self) -> &HardwareAssessment {
247 &self.hardware
248 }
249
250 pub fn analyze_workload(
252 &self,
253 characteristics: &WorkloadCharacteristics,
254 ) -> OptimizationStrategy {
255 let state_size = 1 << characteristics.num_qubits;
257 let total_operations = characteristics.num_gates * state_size;
258 let memory_access =
259 state_size * characteristics.circuit_depth * std::mem::size_of::<Complex64>();
260
261 let intensity = characteristics.computational_intensity;
263
264 if intensity > 10.0 {
265 OptimizationStrategy::Throughput
267 } else if intensity < 1.0 {
268 OptimizationStrategy::MemoryBound
270 } else if characteristics.expected_iterations > 100 {
271 OptimizationStrategy::Throughput
273 } else if state_size < self.hardware.optimal_batch_size {
274 OptimizationStrategy::Latency
276 } else {
277 OptimizationStrategy::Balanced
279 }
280 }
281
282 pub fn get_optimization_params(
284 &self,
285 strategy: OptimizationStrategy,
286 num_qubits: usize,
287 ) -> OptimizationParams {
288 let state_size = 1 << num_qubits;
289
290 match strategy {
291 OptimizationStrategy::Throughput => OptimizationParams {
292 use_simd: true,
293 use_parallel: state_size > 1024,
294 batch_size: self.hardware.optimal_batch_size,
295 tile_size: self.hardware.optimal_tile_size,
296 prefetch_distance: 8,
297 use_streaming: state_size > self.hardware.max_efficient_state_size,
298 },
299 OptimizationStrategy::Latency => OptimizationParams {
300 use_simd: true,
301 use_parallel: false, batch_size: 1,
303 tile_size: 64,
304 prefetch_distance: 4,
305 use_streaming: false,
306 },
307 OptimizationStrategy::Balanced => OptimizationParams {
308 use_simd: true,
309 use_parallel: state_size > 2048,
310 batch_size: (self.hardware.optimal_batch_size / 2).max(32),
311 tile_size: self.hardware.optimal_tile_size,
312 prefetch_distance: 6,
313 use_streaming: state_size > self.hardware.max_efficient_state_size * 2,
314 },
315 OptimizationStrategy::MemoryBound => OptimizationParams {
316 use_simd: true,
317 use_parallel: true, batch_size: self.hardware.optimal_batch_size * 2,
319 tile_size: self.hardware.optimal_tile_size / 2, prefetch_distance: 16, use_streaming: true,
322 },
323 OptimizationStrategy::PowerEfficient => OptimizationParams {
324 use_simd: false, use_parallel: false,
326 batch_size: 32,
327 tile_size: 32,
328 prefetch_distance: 4,
329 use_streaming: false,
330 },
331 }
332 }
333
334 pub fn record_execution(
336 &self,
337 workload_key: &str,
338 strategy: OptimizationStrategy,
339 execution_time: Duration,
340 ) {
341 if let Ok(mut profiles) = self.profiles.write() {
343 let profile = profiles
344 .entry(workload_key.to_string())
345 .or_insert(PerformanceProfile {
346 avg_time: execution_time,
347 std_dev: Duration::ZERO,
348 min_time: execution_time,
349 max_time: execution_time,
350 sample_count: 0,
351 best_strategy: strategy,
352 memory_bandwidth_gbps: 0.0,
353 gflops: 0.0,
354 });
355
356 let n = profile.sample_count as f64;
358 let new_time = execution_time.as_secs_f64();
359 let old_avg = profile.avg_time.as_secs_f64();
360
361 let new_avg = old_avg + (new_time - old_avg) / (n + 1.0);
362 profile.avg_time = Duration::from_secs_f64(new_avg);
363
364 if execution_time < profile.min_time {
365 profile.min_time = execution_time;
366 }
367 if execution_time > profile.max_time {
368 profile.max_time = execution_time;
369 }
370
371 profile.sample_count += 1;
372
373 if profile.sample_count >= self.config.min_samples_for_adaptation {
375 if execution_time.as_secs_f64() < old_avg * (1.0 - self.config.variance_threshold) {
377 profile.best_strategy = strategy;
378 }
379 }
380 }
381
382 if let Ok(mut history) = self.history.write() {
384 history.push(OptimizationEvent {
385 timestamp: Instant::now(),
386 workload_key: workload_key.to_string(),
387 strategy,
388 execution_time,
389 was_optimal: true, });
391
392 if history.len() > 10000 {
394 history.drain(0..1000);
395 }
396 }
397 }
398
399 pub fn get_recommended_strategy(&self, workload_key: &str) -> OptimizationStrategy {
401 if let Ok(profiles) = self.profiles.read() {
402 if let Some(profile) = profiles.get(workload_key) {
403 if profile.sample_count >= self.config.min_samples_for_adaptation {
404 return profile.best_strategy;
405 }
406 }
407 }
408
409 *self.current_strategy.lock().unwrap()
411 }
412
413 pub fn get_profile(&self, workload_key: &str) -> Option<PerformanceProfile> {
415 self.profiles.read().ok()?.get(workload_key).cloned()
416 }
417
418 pub fn generate_report(&self) -> OptimizationReport {
420 let profiles: Vec<_> = self
421 .profiles
422 .read()
423 .map(|p| p.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
424 .unwrap_or_default();
425
426 let total_events = self.history.read().map(|h| h.len()).unwrap_or(0);
427
428 OptimizationReport {
429 hardware_assessment: self.hardware.clone(),
430 workload_profiles: profiles,
431 total_optimization_events: total_events,
432 recommendations: self.generate_recommendations(),
433 }
434 }
435
436 fn generate_recommendations(&self) -> Vec<String> {
438 let mut recommendations = Vec::new();
439
440 if let Ok(profiles) = self.profiles.read() {
442 let mut memory_bound_count = 0;
443 let mut compute_bound_count = 0;
444
445 for (_key, profile) in profiles.iter() {
446 if profile.best_strategy == OptimizationStrategy::MemoryBound {
447 memory_bound_count += 1;
448 } else if profile.best_strategy == OptimizationStrategy::Throughput {
449 compute_bound_count += 1;
450 }
451 }
452
453 if memory_bound_count > compute_bound_count * 2 {
454 recommendations.push(
455 "Most workloads are memory-bound. Consider using larger tiles and aggressive prefetching".to_string()
456 );
457 }
458
459 if compute_bound_count > memory_bound_count * 2 {
460 recommendations.push(
461 "Most workloads are compute-bound. Consider enabling SIMD and parallel execution".to_string()
462 );
463 }
464 }
465
466 if self.hardware.capabilities.cpu.simd.avx512 {
468 recommendations.push(
469 "AVX-512 detected. Ensure alignment to 64 bytes for optimal performance"
470 .to_string(),
471 );
472 } else if self.hardware.capabilities.cpu.simd.avx2 {
473 recommendations.push(
474 "AVX2 detected. Ensure alignment to 32 bytes for optimal performance".to_string(),
475 );
476 }
477
478 if recommendations.is_empty() {
479 recommendations.push("System is operating efficiently".to_string());
480 }
481
482 recommendations
483 }
484
485 pub fn calibrate(&self, num_qubits: usize) -> CalibrationResult {
487 let state_size = 1 << num_qubits;
488 let mut results = HashMap::new();
489
490 for strategy in [
492 OptimizationStrategy::Throughput,
493 OptimizationStrategy::Latency,
494 OptimizationStrategy::Balanced,
495 OptimizationStrategy::MemoryBound,
496 ] {
497 let params = self.get_optimization_params(strategy, num_qubits);
498
499 let estimated_time = self.estimate_execution_time(state_size, ¶ms);
501 results.insert(strategy, estimated_time);
502 }
503
504 let best_strategy = results
506 .iter()
507 .min_by(|a, b| a.1.partial_cmp(b.1).unwrap())
508 .map(|(s, _)| *s)
509 .unwrap_or(OptimizationStrategy::Balanced);
510
511 CalibrationResult {
512 best_strategy,
513 strategy_times: results,
514 optimal_params: self.get_optimization_params(best_strategy, num_qubits),
515 }
516 }
517
518 fn estimate_execution_time(&self, state_size: usize, params: &OptimizationParams) -> Duration {
519 let base_ops = state_size as f64;
521 let simd_factor = if params.use_simd { 4.0 } else { 1.0 };
522 let parallel_factor = if params.use_parallel {
523 self.hardware.capabilities.cpu.logical_cores as f64
524 } else {
525 1.0
526 };
527
528 let ops_per_sec = self.hardware.peak_gflops * 1e9;
529 let estimated_secs = (base_ops * 10.0) / (ops_per_sec * simd_factor * parallel_factor);
530
531 Duration::from_secs_f64(estimated_secs)
532 }
533}
534
535#[derive(Debug, Clone)]
537pub struct OptimizationParams {
538 pub use_simd: bool,
540 pub use_parallel: bool,
542 pub batch_size: usize,
544 pub tile_size: usize,
546 pub prefetch_distance: usize,
548 pub use_streaming: bool,
550}
551
552#[derive(Debug, Clone)]
554pub struct CalibrationResult {
555 pub best_strategy: OptimizationStrategy,
557 pub strategy_times: HashMap<OptimizationStrategy, Duration>,
559 pub optimal_params: OptimizationParams,
561}
562
563#[derive(Debug, Clone)]
565pub struct OptimizationReport {
566 pub hardware_assessment: HardwareAssessment,
568 pub workload_profiles: Vec<(String, PerformanceProfile)>,
570 pub total_optimization_events: usize,
572 pub recommendations: Vec<String>,
574}
575
576#[cfg(test)]
577mod tests {
578 use super::*;
579
580 #[test]
581 fn test_config_default() {
582 let config = AdaptiveOptimizationConfig::default();
583 assert!(config.enable_workload_profiling);
584 assert!(config.enable_memory_optimization);
585 assert!(!config.enable_power_optimization);
586 }
587
588 #[test]
589 fn test_hardware_assessment() {
590 let capabilities = PlatformCapabilities::detect();
591 let assessment = HardwareAssessment::from_capabilities(capabilities);
592
593 assert!(assessment.peak_memory_bandwidth > 0.0);
594 assert!(assessment.peak_gflops > 0.0);
595 assert!(assessment.optimal_batch_size > 0);
596 assert!(assessment.optimal_tile_size > 0);
597 }
598
599 #[test]
600 fn test_optimizer_creation() {
601 let config = AdaptiveOptimizationConfig::default();
602 let optimizer = AdaptiveHardwareOptimizer::new(config);
603
604 assert!(optimizer.hardware_assessment().peak_gflops > 0.0);
605 }
606
607 #[test]
608 fn test_workload_analysis() {
609 let config = AdaptiveOptimizationConfig::default();
610 let optimizer = AdaptiveHardwareOptimizer::new(config);
611
612 let compute_bound = WorkloadCharacteristics {
614 num_qubits: 4,
615 num_gates: 100,
616 circuit_depth: 10,
617 access_pattern: AccessPattern::Sequential,
618 computational_intensity: 15.0,
619 expected_iterations: 1,
620 };
621
622 let strategy = optimizer.analyze_workload(&compute_bound);
623 assert_eq!(strategy, OptimizationStrategy::Throughput);
624
625 let memory_bound = WorkloadCharacteristics {
627 num_qubits: 20,
628 num_gates: 10,
629 circuit_depth: 2,
630 access_pattern: AccessPattern::Random,
631 computational_intensity: 0.5,
632 expected_iterations: 1,
633 };
634
635 let strategy = optimizer.analyze_workload(&memory_bound);
636 assert_eq!(strategy, OptimizationStrategy::MemoryBound);
637 }
638
639 #[test]
640 fn test_optimization_params() {
641 let config = AdaptiveOptimizationConfig::default();
642 let optimizer = AdaptiveHardwareOptimizer::new(config);
643
644 let params = optimizer.get_optimization_params(OptimizationStrategy::Throughput, 10);
645 assert!(params.use_simd);
646 assert!(params.batch_size > 0);
647
648 let params = optimizer.get_optimization_params(OptimizationStrategy::Latency, 10);
649 assert!(!params.use_parallel); }
651
652 #[test]
653 fn test_execution_recording() {
654 let config = AdaptiveOptimizationConfig::default();
655 let optimizer = AdaptiveHardwareOptimizer::new(config);
656
657 for _ in 0..20 {
659 optimizer.record_execution(
660 "test_workload",
661 OptimizationStrategy::Throughput,
662 Duration::from_micros(100),
663 );
664 }
665
666 let profile = optimizer.get_profile("test_workload");
667 assert!(profile.is_some());
668 assert_eq!(profile.unwrap().sample_count, 20);
669 }
670
671 #[test]
672 fn test_calibration() {
673 let config = AdaptiveOptimizationConfig::default();
674 let optimizer = AdaptiveHardwareOptimizer::new(config);
675
676 let result = optimizer.calibrate(6);
677 assert!(!result.strategy_times.is_empty());
678 assert!(result.optimal_params.batch_size > 0);
679 }
680
681 #[test]
682 fn test_optimization_report() {
683 let config = AdaptiveOptimizationConfig::default();
684 let optimizer = AdaptiveHardwareOptimizer::new(config);
685
686 let report = optimizer.generate_report();
687 assert!(!report.recommendations.is_empty());
688 assert!(report.hardware_assessment.peak_gflops > 0.0);
689 }
690
691 #[test]
692 fn test_recommended_strategy() {
693 let config = AdaptiveOptimizationConfig::default();
694 let optimizer = AdaptiveHardwareOptimizer::new(config);
695
696 let strategy = optimizer.get_recommended_strategy("unknown_workload");
698 assert_eq!(strategy, OptimizationStrategy::Balanced);
699 }
700}