1use crate::error::{QuantRS2Error, QuantRS2Result};
14use crate::platform::PlatformCapabilities;
15use scirs2_core::Complex64;
16use std::collections::HashMap;
17use std::sync::{Arc, Mutex, RwLock};
18use std::time::{Duration, Instant};
19
20#[derive(Debug, Clone)]
22pub struct AdaptiveOptimizationConfig {
23 pub enable_workload_profiling: bool,
25 pub enable_memory_optimization: bool,
27 pub enable_power_optimization: bool,
29 pub min_samples_for_adaptation: usize,
31 pub variance_threshold: f64,
33 pub enable_runtime_benchmarking: bool,
35 pub benchmark_samples: usize,
37}
38
39impl Default for AdaptiveOptimizationConfig {
40 fn default() -> Self {
41 Self {
42 enable_workload_profiling: true,
43 enable_memory_optimization: true,
44 enable_power_optimization: false, min_samples_for_adaptation: 10,
46 variance_threshold: 0.2,
47 enable_runtime_benchmarking: true,
48 benchmark_samples: 5,
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
55pub struct WorkloadCharacteristics {
56 pub num_qubits: usize,
58 pub num_gates: usize,
60 pub circuit_depth: usize,
62 pub access_pattern: AccessPattern,
64 pub computational_intensity: f64,
66 pub expected_iterations: usize,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub enum AccessPattern {
73 Sequential,
75 Strided,
77 Random,
79 Mixed,
81}
82
83#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
85pub enum OptimizationStrategy {
86 Throughput,
88 Latency,
90 Balanced,
92 MemoryBound,
94 PowerEfficient,
96}
97
98#[derive(Debug, Clone)]
100pub struct PerformanceProfile {
101 pub avg_time: Duration,
103 pub std_dev: Duration,
105 pub min_time: Duration,
107 pub max_time: Duration,
109 pub sample_count: usize,
111 pub best_strategy: OptimizationStrategy,
113 pub memory_bandwidth_gbps: f64,
115 pub gflops: f64,
117}
118
119#[derive(Debug, Clone)]
121pub struct HardwareAssessment {
122 pub capabilities: PlatformCapabilities,
124 pub peak_memory_bandwidth: f64,
126 pub peak_gflops: f64,
128 pub optimal_batch_size: usize,
130 pub optimal_tile_size: usize,
132 pub max_efficient_state_size: usize,
134}
135
136impl HardwareAssessment {
137 pub fn from_capabilities(capabilities: PlatformCapabilities) -> Self {
139 let peak_memory_bandwidth = Self::estimate_memory_bandwidth(&capabilities);
141 let peak_gflops = Self::estimate_peak_gflops(&capabilities);
142 let optimal_batch_size = Self::compute_optimal_batch_size(&capabilities);
143 let optimal_tile_size = Self::compute_optimal_tile_size(&capabilities);
144 let max_efficient_state_size = Self::compute_max_efficient_state_size(&capabilities);
145
146 Self {
147 capabilities,
148 peak_memory_bandwidth,
149 peak_gflops,
150 optimal_batch_size,
151 optimal_tile_size,
152 max_efficient_state_size,
153 }
154 }
155
156 fn estimate_memory_bandwidth(capabilities: &PlatformCapabilities) -> f64 {
157 let cores = capabilities.cpu.logical_cores as f64;
159 let base_bandwidth: f64 = 25.6; (base_bandwidth * 2.0 * 0.8).min(cores * 10.0)
163 }
164
165 fn estimate_peak_gflops(capabilities: &PlatformCapabilities) -> f64 {
166 let cores = capabilities.cpu.logical_cores as f64;
167 let base_gflops_per_core = if capabilities.cpu.simd.avx512 {
168 100.0
169 } else if capabilities.cpu.simd.avx2 {
170 50.0
171 } else {
172 25.0
173 };
174 cores * base_gflops_per_core
175 }
176
177 fn compute_optimal_batch_size(capabilities: &PlatformCapabilities) -> usize {
178 let l3_cache = capabilities.cpu.cache.l3.unwrap_or(8 * 1024 * 1024);
179 let complex_size = std::mem::size_of::<Complex64>();
181 (l3_cache / (complex_size * 16)).clamp(32, 1024)
182 }
183
184 fn compute_optimal_tile_size(capabilities: &PlatformCapabilities) -> usize {
185 let l2_cache = capabilities.cpu.cache.l2.unwrap_or(256 * 1024);
186 let complex_size = std::mem::size_of::<Complex64>();
188 let elements = l2_cache / (complex_size * 4); (elements as f64).sqrt() as usize
190 }
191
192 fn compute_max_efficient_state_size(capabilities: &PlatformCapabilities) -> usize {
193 let total_cache = capabilities.cpu.cache.l3.unwrap_or(8 * 1024 * 1024);
194 let cores = capabilities.cpu.logical_cores;
195 let complex_size = std::mem::size_of::<Complex64>();
197 (total_cache * cores) / (complex_size * 2)
198 }
199}
200
201pub struct AdaptiveHardwareOptimizer {
203 config: AdaptiveOptimizationConfig,
205 hardware: HardwareAssessment,
207 profiles: RwLock<HashMap<String, PerformanceProfile>>,
209 current_strategy: Mutex<OptimizationStrategy>,
211 history: RwLock<Vec<OptimizationEvent>>,
213}
214
215#[derive(Debug, Clone)]
217pub struct OptimizationEvent {
218 pub timestamp: Instant,
220 pub workload_key: String,
222 pub strategy: OptimizationStrategy,
224 pub execution_time: Duration,
226 pub was_optimal: bool,
228}
229
230impl AdaptiveHardwareOptimizer {
231 pub fn new(config: AdaptiveOptimizationConfig) -> Self {
233 let capabilities = PlatformCapabilities::detect();
234 let hardware = HardwareAssessment::from_capabilities(capabilities);
235
236 Self {
237 config,
238 hardware,
239 profiles: RwLock::new(HashMap::new()),
240 current_strategy: Mutex::new(OptimizationStrategy::Balanced),
241 history: RwLock::new(Vec::new()),
242 }
243 }
244
245 pub const fn hardware_assessment(&self) -> &HardwareAssessment {
247 &self.hardware
248 }
249
250 pub fn analyze_workload(
252 &self,
253 characteristics: &WorkloadCharacteristics,
254 ) -> OptimizationStrategy {
255 let state_size = 1 << characteristics.num_qubits;
257 let total_operations = characteristics.num_gates * state_size;
258 let memory_access =
259 state_size * characteristics.circuit_depth * std::mem::size_of::<Complex64>();
260
261 let intensity = characteristics.computational_intensity;
263
264 if intensity > 10.0 {
265 OptimizationStrategy::Throughput
267 } else if intensity < 1.0 {
268 OptimizationStrategy::MemoryBound
270 } else if characteristics.expected_iterations > 100 {
271 OptimizationStrategy::Throughput
273 } else if state_size < self.hardware.optimal_batch_size {
274 OptimizationStrategy::Latency
276 } else {
277 OptimizationStrategy::Balanced
279 }
280 }
281
282 pub fn get_optimization_params(
284 &self,
285 strategy: OptimizationStrategy,
286 num_qubits: usize,
287 ) -> OptimizationParams {
288 let state_size = 1 << num_qubits;
289
290 match strategy {
291 OptimizationStrategy::Throughput => OptimizationParams {
292 use_simd: true,
293 use_parallel: state_size > 1024,
294 batch_size: self.hardware.optimal_batch_size,
295 tile_size: self.hardware.optimal_tile_size,
296 prefetch_distance: 8,
297 use_streaming: state_size > self.hardware.max_efficient_state_size,
298 },
299 OptimizationStrategy::Latency => OptimizationParams {
300 use_simd: true,
301 use_parallel: false, batch_size: 1,
303 tile_size: 64,
304 prefetch_distance: 4,
305 use_streaming: false,
306 },
307 OptimizationStrategy::Balanced => OptimizationParams {
308 use_simd: true,
309 use_parallel: state_size > 2048,
310 batch_size: (self.hardware.optimal_batch_size / 2).max(32),
311 tile_size: self.hardware.optimal_tile_size,
312 prefetch_distance: 6,
313 use_streaming: state_size > self.hardware.max_efficient_state_size * 2,
314 },
315 OptimizationStrategy::MemoryBound => OptimizationParams {
316 use_simd: true,
317 use_parallel: true, batch_size: self.hardware.optimal_batch_size * 2,
319 tile_size: self.hardware.optimal_tile_size / 2, prefetch_distance: 16, use_streaming: true,
322 },
323 OptimizationStrategy::PowerEfficient => OptimizationParams {
324 use_simd: false, use_parallel: false,
326 batch_size: 32,
327 tile_size: 32,
328 prefetch_distance: 4,
329 use_streaming: false,
330 },
331 }
332 }
333
334 pub fn record_execution(
336 &self,
337 workload_key: &str,
338 strategy: OptimizationStrategy,
339 execution_time: Duration,
340 ) {
341 if let Ok(mut profiles) = self.profiles.write() {
343 let profile = profiles
344 .entry(workload_key.to_string())
345 .or_insert(PerformanceProfile {
346 avg_time: execution_time,
347 std_dev: Duration::ZERO,
348 min_time: execution_time,
349 max_time: execution_time,
350 sample_count: 0,
351 best_strategy: strategy,
352 memory_bandwidth_gbps: 0.0,
353 gflops: 0.0,
354 });
355
356 let n = profile.sample_count as f64;
358 let new_time = execution_time.as_secs_f64();
359 let old_avg = profile.avg_time.as_secs_f64();
360
361 let new_avg = old_avg + (new_time - old_avg) / (n + 1.0);
362 profile.avg_time = Duration::from_secs_f64(new_avg);
363
364 if execution_time < profile.min_time {
365 profile.min_time = execution_time;
366 }
367 if execution_time > profile.max_time {
368 profile.max_time = execution_time;
369 }
370
371 profile.sample_count += 1;
372
373 if profile.sample_count >= self.config.min_samples_for_adaptation {
375 if execution_time.as_secs_f64() < old_avg * (1.0 - self.config.variance_threshold) {
377 profile.best_strategy = strategy;
378 }
379 }
380 }
381
382 if let Ok(mut history) = self.history.write() {
384 history.push(OptimizationEvent {
385 timestamp: Instant::now(),
386 workload_key: workload_key.to_string(),
387 strategy,
388 execution_time,
389 was_optimal: true, });
391
392 if history.len() > 10000 {
394 history.drain(0..1000);
395 }
396 }
397 }
398
399 pub fn get_recommended_strategy(&self, workload_key: &str) -> OptimizationStrategy {
401 if let Ok(profiles) = self.profiles.read() {
402 if let Some(profile) = profiles.get(workload_key) {
403 if profile.sample_count >= self.config.min_samples_for_adaptation {
404 return profile.best_strategy;
405 }
406 }
407 }
408
409 *self
411 .current_strategy
412 .lock()
413 .unwrap_or_else(|e| e.into_inner())
414 }
415
416 pub fn get_profile(&self, workload_key: &str) -> Option<PerformanceProfile> {
418 self.profiles.read().ok()?.get(workload_key).cloned()
419 }
420
421 pub fn generate_report(&self) -> OptimizationReport {
423 let profiles: Vec<_> = self
424 .profiles
425 .read()
426 .map(|p| p.iter().map(|(k, v)| (k.clone(), v.clone())).collect())
427 .unwrap_or_default();
428
429 let total_events = self.history.read().map(|h| h.len()).unwrap_or(0);
430
431 OptimizationReport {
432 hardware_assessment: self.hardware.clone(),
433 workload_profiles: profiles,
434 total_optimization_events: total_events,
435 recommendations: self.generate_recommendations(),
436 }
437 }
438
439 fn generate_recommendations(&self) -> Vec<String> {
441 let mut recommendations = Vec::new();
442
443 if let Ok(profiles) = self.profiles.read() {
445 let mut memory_bound_count = 0;
446 let mut compute_bound_count = 0;
447
448 for (_key, profile) in profiles.iter() {
449 if profile.best_strategy == OptimizationStrategy::MemoryBound {
450 memory_bound_count += 1;
451 } else if profile.best_strategy == OptimizationStrategy::Throughput {
452 compute_bound_count += 1;
453 }
454 }
455
456 if memory_bound_count > compute_bound_count * 2 {
457 recommendations.push(
458 "Most workloads are memory-bound. Consider using larger tiles and aggressive prefetching".to_string()
459 );
460 }
461
462 if compute_bound_count > memory_bound_count * 2 {
463 recommendations.push(
464 "Most workloads are compute-bound. Consider enabling SIMD and parallel execution".to_string()
465 );
466 }
467 }
468
469 if self.hardware.capabilities.cpu.simd.avx512 {
471 recommendations.push(
472 "AVX-512 detected. Ensure alignment to 64 bytes for optimal performance"
473 .to_string(),
474 );
475 } else if self.hardware.capabilities.cpu.simd.avx2 {
476 recommendations.push(
477 "AVX2 detected. Ensure alignment to 32 bytes for optimal performance".to_string(),
478 );
479 }
480
481 if recommendations.is_empty() {
482 recommendations.push("System is operating efficiently".to_string());
483 }
484
485 recommendations
486 }
487
488 pub fn calibrate(&self, num_qubits: usize) -> CalibrationResult {
490 let state_size = 1 << num_qubits;
491 let mut results = HashMap::new();
492
493 for strategy in [
495 OptimizationStrategy::Throughput,
496 OptimizationStrategy::Latency,
497 OptimizationStrategy::Balanced,
498 OptimizationStrategy::MemoryBound,
499 ] {
500 let params = self.get_optimization_params(strategy, num_qubits);
501
502 let estimated_time = self.estimate_execution_time(state_size, ¶ms);
504 results.insert(strategy, estimated_time);
505 }
506
507 let best_strategy = results
509 .iter()
510 .min_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
511 .map_or(OptimizationStrategy::Balanced, |(s, _)| *s);
512
513 CalibrationResult {
514 best_strategy,
515 strategy_times: results,
516 optimal_params: self.get_optimization_params(best_strategy, num_qubits),
517 }
518 }
519
520 fn estimate_execution_time(&self, state_size: usize, params: &OptimizationParams) -> Duration {
521 let base_ops = state_size as f64;
523 let simd_factor = if params.use_simd { 4.0 } else { 1.0 };
524 let parallel_factor = if params.use_parallel {
525 self.hardware.capabilities.cpu.logical_cores as f64
526 } else {
527 1.0
528 };
529
530 let ops_per_sec = self.hardware.peak_gflops * 1e9;
531 let estimated_secs = (base_ops * 10.0) / (ops_per_sec * simd_factor * parallel_factor);
532
533 Duration::from_secs_f64(estimated_secs)
534 }
535}
536
537#[derive(Debug, Clone)]
539pub struct OptimizationParams {
540 pub use_simd: bool,
542 pub use_parallel: bool,
544 pub batch_size: usize,
546 pub tile_size: usize,
548 pub prefetch_distance: usize,
550 pub use_streaming: bool,
552}
553
554#[derive(Debug, Clone)]
556pub struct CalibrationResult {
557 pub best_strategy: OptimizationStrategy,
559 pub strategy_times: HashMap<OptimizationStrategy, Duration>,
561 pub optimal_params: OptimizationParams,
563}
564
565#[derive(Debug, Clone)]
567pub struct OptimizationReport {
568 pub hardware_assessment: HardwareAssessment,
570 pub workload_profiles: Vec<(String, PerformanceProfile)>,
572 pub total_optimization_events: usize,
574 pub recommendations: Vec<String>,
576}
577
578#[cfg(test)]
579mod tests {
580 use super::*;
581
582 #[test]
583 fn test_config_default() {
584 let config = AdaptiveOptimizationConfig::default();
585 assert!(config.enable_workload_profiling);
586 assert!(config.enable_memory_optimization);
587 assert!(!config.enable_power_optimization);
588 }
589
590 #[test]
591 fn test_hardware_assessment() {
592 let capabilities = PlatformCapabilities::detect();
593 let assessment = HardwareAssessment::from_capabilities(capabilities);
594
595 assert!(assessment.peak_memory_bandwidth > 0.0);
596 assert!(assessment.peak_gflops > 0.0);
597 assert!(assessment.optimal_batch_size > 0);
598 assert!(assessment.optimal_tile_size > 0);
599 }
600
601 #[test]
602 fn test_optimizer_creation() {
603 let config = AdaptiveOptimizationConfig::default();
604 let optimizer = AdaptiveHardwareOptimizer::new(config);
605
606 assert!(optimizer.hardware_assessment().peak_gflops > 0.0);
607 }
608
609 #[test]
610 fn test_workload_analysis() {
611 let config = AdaptiveOptimizationConfig::default();
612 let optimizer = AdaptiveHardwareOptimizer::new(config);
613
614 let compute_bound = WorkloadCharacteristics {
616 num_qubits: 4,
617 num_gates: 100,
618 circuit_depth: 10,
619 access_pattern: AccessPattern::Sequential,
620 computational_intensity: 15.0,
621 expected_iterations: 1,
622 };
623
624 let strategy = optimizer.analyze_workload(&compute_bound);
625 assert_eq!(strategy, OptimizationStrategy::Throughput);
626
627 let memory_bound = WorkloadCharacteristics {
629 num_qubits: 20,
630 num_gates: 10,
631 circuit_depth: 2,
632 access_pattern: AccessPattern::Random,
633 computational_intensity: 0.5,
634 expected_iterations: 1,
635 };
636
637 let strategy = optimizer.analyze_workload(&memory_bound);
638 assert_eq!(strategy, OptimizationStrategy::MemoryBound);
639 }
640
641 #[test]
642 fn test_optimization_params() {
643 let config = AdaptiveOptimizationConfig::default();
644 let optimizer = AdaptiveHardwareOptimizer::new(config);
645
646 let params = optimizer.get_optimization_params(OptimizationStrategy::Throughput, 10);
647 assert!(params.use_simd);
648 assert!(params.batch_size > 0);
649
650 let params = optimizer.get_optimization_params(OptimizationStrategy::Latency, 10);
651 assert!(!params.use_parallel); }
653
654 #[test]
655 fn test_execution_recording() {
656 let config = AdaptiveOptimizationConfig::default();
657 let optimizer = AdaptiveHardwareOptimizer::new(config);
658
659 for _ in 0..20 {
661 optimizer.record_execution(
662 "test_workload",
663 OptimizationStrategy::Throughput,
664 Duration::from_micros(100),
665 );
666 }
667
668 let profile = optimizer.get_profile("test_workload");
669 assert!(profile.is_some());
670 assert_eq!(profile.expect("profile should exist").sample_count, 20);
671 }
672
673 #[test]
674 fn test_calibration() {
675 let config = AdaptiveOptimizationConfig::default();
676 let optimizer = AdaptiveHardwareOptimizer::new(config);
677
678 let result = optimizer.calibrate(6);
679 assert!(!result.strategy_times.is_empty());
680 assert!(result.optimal_params.batch_size > 0);
681 }
682
683 #[test]
684 fn test_optimization_report() {
685 let config = AdaptiveOptimizationConfig::default();
686 let optimizer = AdaptiveHardwareOptimizer::new(config);
687
688 let report = optimizer.generate_report();
689 assert!(!report.recommendations.is_empty());
690 assert!(report.hardware_assessment.peak_gflops > 0.0);
691 }
692
693 #[test]
694 fn test_recommended_strategy() {
695 let config = AdaptiveOptimizationConfig::default();
696 let optimizer = AdaptiveHardwareOptimizer::new(config);
697
698 let strategy = optimizer.get_recommended_strategy("unknown_workload");
700 assert_eq!(strategy, OptimizationStrategy::Balanced);
701 }
702}