quantrs2_sim/
performance_benchmark.rs

1//! Comprehensive performance benchmarking suite for quantum simulation
2//!
3//! This module provides advanced benchmarking capabilities to measure and analyze
4//! the performance of various quantum simulation components, including optimizations,
5//! memory efficiency, and scalability analysis.
6
7use scirs2_core::parallel_ops::{IndexedParallelIterator, ParallelIterator};
8use scirs2_core::Complex64;
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::time::{Duration, Instant};
12
13use quantrs2_circuit::builder::{Circuit, Simulator};
14use quantrs2_core::{error::QuantRS2Result, platform::PlatformCapabilities, qubit::QubitId};
15
16use crate::circuit_optimization::{CircuitOptimizer, OptimizationConfig};
17use crate::optimized_simd;
18use crate::statevector::StateVectorSimulator;
19
20/// Comprehensive benchmarking framework
21#[derive(Debug)]
22pub struct QuantumBenchmarkSuite {
23    /// Benchmark configuration
24    config: BenchmarkConfig,
25    /// Results storage
26    results: Vec<BenchmarkResult>,
27    /// System information
28    system_info: SystemInfo,
29}
30
31/// Benchmark configuration parameters
32#[derive(Debug, Clone)]
33pub struct BenchmarkConfig {
34    /// Number of qubits to test (range)
35    pub qubit_range: std::ops::Range<usize>,
36    /// Number of iterations per benchmark
37    pub iterations: usize,
38    /// Enable memory profiling
39    pub profile_memory: bool,
40    /// Enable optimization comparison
41    pub compare_optimizations: bool,
42    /// Enable scalability analysis
43    pub scalability_analysis: bool,
44    /// Warmup iterations before timing
45    pub warmup_iterations: usize,
46    /// Maximum circuit depth for tests
47    pub max_circuit_depth: usize,
48}
49
50/// Individual benchmark result
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct BenchmarkResult {
53    /// Benchmark name
54    pub name: String,
55    /// Number of qubits tested
56    pub qubits: usize,
57    /// Circuit depth
58    pub depth: usize,
59    /// Execution time statistics
60    pub timing: TimingStats,
61    /// Memory usage statistics
62    pub memory: MemoryStats,
63    /// Throughput metrics
64    pub throughput: ThroughputStats,
65    /// Configuration used
66    pub config_description: String,
67}
68
69/// Timing statistics
70#[derive(Debug, Clone, Serialize, Deserialize)]
71pub struct TimingStats {
72    /// Average execution time
73    pub average_ns: u128,
74    /// Minimum execution time
75    pub min_ns: u128,
76    /// Maximum execution time
77    pub max_ns: u128,
78    /// Standard deviation
79    pub std_dev_ns: f64,
80    /// 95th percentile
81    pub p95_ns: u128,
82    /// 99th percentile
83    pub p99_ns: u128,
84}
85
86/// Memory usage statistics
87#[derive(Debug, Clone, Serialize, Deserialize)]
88pub struct MemoryStats {
89    /// Peak memory usage in bytes
90    pub peak_memory_bytes: usize,
91    /// Average memory usage
92    pub average_memory_bytes: usize,
93    /// Memory efficiency score (0-1)
94    pub efficiency_score: f64,
95    /// Buffer pool utilization
96    pub buffer_pool_utilization: f64,
97}
98
99/// Throughput statistics
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ThroughputStats {
102    /// Gates per second
103    pub gates_per_second: f64,
104    /// Qubits processed per second
105    pub qubits_per_second: f64,
106    /// Operations per second
107    pub operations_per_second: f64,
108    /// Simulation steps per second
109    pub steps_per_second: f64,
110}
111
112/// System information for benchmark context
113#[derive(Debug, Clone, Serialize, Deserialize)]
114pub struct SystemInfo {
115    /// CPU information
116    pub cpu_info: String,
117    /// Available memory
118    pub total_memory_gb: f64,
119    /// Number of CPU cores
120    pub cpu_cores: usize,
121    /// Rust version
122    pub rust_version: String,
123    /// Compiler optimization level
124    pub optimization_level: String,
125    /// SIMD support
126    pub simd_support: Vec<String>,
127}
128
129/// Benchmark comparison result
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct BenchmarkComparison {
132    /// Baseline benchmark name
133    pub baseline: String,
134    /// Comparison benchmark name
135    pub comparison: String,
136    /// Performance improvement ratio
137    pub improvement_ratio: f64,
138    /// Memory efficiency improvement
139    pub memory_improvement: f64,
140    /// Throughput improvement
141    pub throughput_improvement: f64,
142    /// Scalability comparison
143    pub scalability_factor: f64,
144}
145
146/// Scalability analysis result
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct ScalabilityAnalysis {
149    /// Growth factor per additional qubit
150    pub time_growth_factor: f64,
151    /// Memory growth factor per additional qubit
152    pub memory_growth_factor: f64,
153    /// Maximum practical qubit count
154    pub max_practical_qubits: usize,
155    /// Efficiency plateau point
156    pub efficiency_plateau: usize,
157    /// Complexity class estimate
158    pub complexity_class: String,
159}
160
161impl Default for BenchmarkConfig {
162    fn default() -> Self {
163        Self {
164            qubit_range: 1..20,
165            iterations: 10,
166            profile_memory: true,
167            compare_optimizations: true,
168            scalability_analysis: true,
169            warmup_iterations: 3,
170            max_circuit_depth: 50,
171        }
172    }
173}
174
175impl QuantumBenchmarkSuite {
176    /// Create a new benchmark suite
177    #[must_use]
178    pub fn new(config: BenchmarkConfig) -> Self {
179        Self {
180            config,
181            results: Vec::new(),
182            system_info: Self::gather_system_info(),
183        }
184    }
185
186    /// Run comprehensive benchmark suite
187    pub fn run_all_benchmarks(&mut self) -> QuantRS2Result<()> {
188        println!("🚀 Starting Comprehensive Quantum Simulation Benchmarks");
189        println!("========================================================\n");
190
191        // Print system information
192        self.print_system_info();
193
194        // Core simulation benchmarks
195        self.benchmark_basic_gates()?;
196        self.benchmark_circuit_execution()?;
197        self.benchmark_memory_efficiency()?;
198
199        if self.config.compare_optimizations {
200            self.benchmark_optimization_comparison()?;
201        }
202
203        if self.config.scalability_analysis {
204            self.benchmark_scalability()?;
205        }
206
207        // SIMD performance benchmarks
208        self.benchmark_simd_performance()?;
209
210        // Circuit optimization benchmarks
211        self.benchmark_circuit_optimization()?;
212
213        // Generate comprehensive report
214        self.generate_final_report();
215
216        Ok(())
217    }
218
219    /// Benchmark basic gate operations
220    pub fn benchmark_basic_gates(&mut self) -> QuantRS2Result<()> {
221        println!("🔧 Benchmarking Basic Gate Operations");
222        println!("------------------------------------");
223
224        let gates = vec![
225            (
226                "Hadamard",
227                Box::new(|circuit: &mut Circuit<16>, q: usize| {
228                    circuit.h(QubitId::new(q as u32))?;
229                    Ok(())
230                }) as Box<dyn Fn(&mut Circuit<16>, usize) -> QuantRS2Result<()>>,
231            ),
232            (
233                "Pauli-X",
234                Box::new(|circuit: &mut Circuit<16>, q: usize| {
235                    circuit.x(QubitId::new(q as u32))?;
236                    Ok(())
237                }),
238            ),
239            (
240                "Pauli-Y",
241                Box::new(|circuit: &mut Circuit<16>, q: usize| {
242                    circuit.y(QubitId::new(q as u32))?;
243                    Ok(())
244                }),
245            ),
246            (
247                "Pauli-Z",
248                Box::new(|circuit: &mut Circuit<16>, q: usize| {
249                    circuit.z(QubitId::new(q as u32))?;
250                    Ok(())
251                }),
252            ),
253            (
254                "Phase-S",
255                Box::new(|circuit: &mut Circuit<16>, q: usize| {
256                    circuit.s(QubitId::new(q as u32))?;
257                    Ok(())
258                }),
259            ),
260            (
261                "T-Gate",
262                Box::new(|circuit: &mut Circuit<16>, q: usize| {
263                    circuit.t(QubitId::new(q as u32))?;
264                    Ok(())
265                }),
266            ),
267        ];
268
269        for (gate_name, gate_fn) in gates {
270            for qubits in [4, 8, 12, 16] {
271                let result = self.benchmark_gate_operation(gate_name, qubits, &gate_fn)?;
272                self.results.push(result);
273                println!(
274                    "  ✓ {} on {} qubits: {:.2}ms",
275                    gate_name,
276                    qubits,
277                    self.results
278                        .last()
279                        .expect("results should not be empty after push")
280                        .timing
281                        .average_ns as f64
282                        / 1_000_000.0
283                );
284            }
285        }
286
287        println!();
288        Ok(())
289    }
290
291    /// Benchmark circuit execution performance
292    pub fn benchmark_circuit_execution(&mut self) -> QuantRS2Result<()> {
293        println!("⚡ Benchmarking Circuit Execution");
294        println!("--------------------------------");
295
296        for qubits in self.config.qubit_range.clone().step_by(2) {
297            if qubits > 16 {
298                break;
299            } // Limit for demonstration
300
301            let result = self.benchmark_random_circuit(qubits, 20)?;
302            self.results.push(result);
303            println!(
304                "  ✓ Random circuit {} qubits: {:.2}ms",
305                qubits,
306                self.results
307                    .last()
308                    .expect("results should not be empty after push")
309                    .timing
310                    .average_ns as f64
311                    / 1_000_000.0
312            );
313        }
314
315        println!();
316        Ok(())
317    }
318
319    /// Benchmark memory efficiency
320    pub fn benchmark_memory_efficiency(&mut self) -> QuantRS2Result<()> {
321        println!("💾 Benchmarking Memory Efficiency");
322        println!("--------------------------------");
323
324        // Test different memory configurations
325        let configs = vec![
326            ("Standard", StateVectorSimulator::new()),
327            ("High-Performance", StateVectorSimulator::high_performance()),
328            ("Sequential", StateVectorSimulator::sequential()),
329        ];
330
331        for (config_name, simulator) in configs {
332            for qubits in [8, 12, 16] {
333                let result = self.benchmark_memory_usage(config_name, qubits, &simulator)?;
334                self.results.push(result);
335                println!(
336                    "  ✓ {} config {} qubits: {:.1}MB peak",
337                    config_name,
338                    qubits,
339                    self.results
340                        .last()
341                        .expect("results should not be empty after push")
342                        .memory
343                        .peak_memory_bytes as f64
344                        / 1_048_576.0
345                );
346            }
347        }
348
349        println!();
350        Ok(())
351    }
352
353    /// Benchmark optimization comparison
354    pub fn benchmark_optimization_comparison(&mut self) -> QuantRS2Result<()> {
355        println!("🔄 Benchmarking Optimization Strategies");
356        println!("--------------------------------------");
357
358        let optimization_configs = vec![
359            (
360                "No Optimization",
361                OptimizationConfig {
362                    enable_gate_fusion: false,
363                    enable_redundant_elimination: false,
364                    enable_commutation_reordering: false,
365                    enable_single_qubit_optimization: false,
366                    enable_two_qubit_optimization: false,
367                    max_passes: 0,
368                    enable_depth_reduction: false,
369                },
370            ),
371            (
372                "Conservative",
373                OptimizationConfig {
374                    enable_gate_fusion: false,
375                    enable_redundant_elimination: true,
376                    enable_commutation_reordering: false,
377                    enable_single_qubit_optimization: false,
378                    enable_two_qubit_optimization: false,
379                    max_passes: 1,
380                    enable_depth_reduction: false,
381                },
382            ),
383            ("Aggressive", OptimizationConfig::default()),
384        ];
385
386        for (opt_name, opt_config) in optimization_configs {
387            for qubits in [8, 12, 16] {
388                let result = self.benchmark_optimization_strategy(opt_name, qubits, &opt_config)?;
389                self.results.push(result);
390                println!(
391                    "  ✓ {} optimization {} qubits: {:.2}ms",
392                    opt_name,
393                    qubits,
394                    self.results
395                        .last()
396                        .expect("results should not be empty after push")
397                        .timing
398                        .average_ns as f64
399                        / 1_000_000.0
400                );
401            }
402        }
403
404        println!();
405        Ok(())
406    }
407
408    /// Benchmark scalability analysis
409    fn benchmark_scalability(&self) -> QuantRS2Result<()> {
410        println!("📈 Analyzing Scalability");
411        println!("-----------------------");
412
413        let mut scalability_data = Vec::new();
414
415        for qubits in (4..=20).step_by(2) {
416            let start = Instant::now();
417            let circuit = self.create_test_circuit(qubits, 10)?;
418            let simulator = StateVectorSimulator::new();
419
420            // Warmup
421            for _ in 0..self.config.warmup_iterations {
422                let _ = simulator.run(&circuit);
423            }
424
425            // Actual timing
426            let mut times = Vec::new();
427            for _ in 0..self.config.iterations {
428                let bench_start = Instant::now();
429                let _ = simulator.run(&circuit)?;
430                times.push(bench_start.elapsed());
431            }
432
433            let avg_time = times.iter().sum::<Duration>() / times.len() as u32;
434            scalability_data.push((qubits, avg_time));
435
436            println!(
437                "  ✓ {} qubits: {:.2}ms",
438                qubits,
439                avg_time.as_secs_f64() * 1000.0
440            );
441
442            // Break if taking too long
443            if avg_time > Duration::from_secs(10) {
444                println!("  ⚠️ Breaking at {qubits} qubits due to time limit");
445                break;
446            }
447        }
448
449        let analysis = self.analyze_scalability(&scalability_data);
450        println!(
451            "  📊 Growth factor: {:.2}x per qubit",
452            analysis.time_growth_factor
453        );
454        println!(
455            "  🎯 Max practical qubits: {}",
456            analysis.max_practical_qubits
457        );
458
459        println!();
460        Ok(())
461    }
462
463    /// Benchmark SIMD performance
464    fn benchmark_simd_performance(&self) -> QuantRS2Result<()> {
465        println!("🏎️ Benchmarking SIMD Performance");
466        println!("--------------------------------");
467
468        let test_sizes = vec![1024, 4096, 16_384, 65_536];
469
470        for size in test_sizes {
471            // Prepare test data
472            let mut state = vec![Complex64::new(1.0 / (size as f64).sqrt(), 0.0); size];
473            let gate_matrix = [
474                Complex64::new(std::f64::consts::FRAC_1_SQRT_2, 0.0), // 1/√2
475                Complex64::new(std::f64::consts::FRAC_1_SQRT_2, 0.0),
476                Complex64::new(std::f64::consts::FRAC_1_SQRT_2, 0.0),
477                Complex64::new(-std::f64::consts::FRAC_1_SQRT_2, 0.0),
478            ];
479
480            // Benchmark regular implementation
481            let start = Instant::now();
482            for _ in 0..100 {
483                // Simulate gate application without SIMD
484                for i in (0..size).step_by(2) {
485                    let temp0 = state[i];
486                    let temp1 = state[i + 1];
487                    state[i] = gate_matrix[0] * temp0 + gate_matrix[1] * temp1;
488                    state[i + 1] = gate_matrix[2] * temp0 + gate_matrix[3] * temp1;
489                }
490            }
491            let regular_time = start.elapsed();
492
493            // Benchmark SIMD implementation
494            let mut state_simd = state.clone();
495            let start = Instant::now();
496            for _ in 0..100 {
497                let half_size = size / 2;
498                let in_amps0: Vec<Complex64> = (0..half_size).map(|i| state_simd[i * 2]).collect();
499                let in_amps1: Vec<Complex64> =
500                    (0..half_size).map(|i| state_simd[i * 2 + 1]).collect();
501                let mut out_amps0 = vec![Complex64::new(0.0, 0.0); half_size];
502                let mut out_amps1 = vec![Complex64::new(0.0, 0.0); half_size];
503
504                optimized_simd::apply_single_qubit_gate_optimized(
505                    &gate_matrix,
506                    &in_amps0,
507                    &in_amps1,
508                    &mut out_amps0,
509                    &mut out_amps1,
510                );
511
512                for i in 0..half_size {
513                    state_simd[i * 2] = out_amps0[i];
514                    state_simd[i * 2 + 1] = out_amps1[i];
515                }
516            }
517            let simd_time = start.elapsed();
518
519            let speedup = regular_time.as_nanos() as f64 / simd_time.as_nanos() as f64;
520            println!("  ✓ Size {size}: {speedup:.2}x SIMD speedup");
521        }
522
523        println!();
524        Ok(())
525    }
526
527    /// Benchmark circuit optimization
528    fn benchmark_circuit_optimization(&self) -> QuantRS2Result<()> {
529        println!("🔧 Benchmarking Circuit Optimization");
530        println!("-----------------------------------");
531
532        for qubits in [8, 12, 16] {
533            // Create circuit with optimization opportunities
534            let circuit = self.create_optimizable_circuit(qubits)?;
535            let mut optimizer = CircuitOptimizer::new();
536
537            let start = Instant::now();
538            let _optimized = optimizer.optimize(&circuit)?;
539            let optimization_time = start.elapsed();
540
541            let stats = optimizer.get_statistics();
542            println!(
543                "  ✓ {} qubits: {:.2}ms optimization, {:.1}% reduction",
544                qubits,
545                optimization_time.as_secs_f64() * 1000.0,
546                stats.gate_count_reduction()
547            );
548        }
549
550        println!();
551        Ok(())
552    }
553
554    /// Helper method to benchmark a single gate operation
555    fn benchmark_gate_operation<F>(
556        &self,
557        gate_name: &str,
558        qubits: usize,
559        gate_fn: &F,
560    ) -> QuantRS2Result<BenchmarkResult>
561    where
562        F: Fn(&mut Circuit<16>, usize) -> QuantRS2Result<()>,
563    {
564        let mut times = Vec::new();
565        let simulator = StateVectorSimulator::new();
566
567        // Warmup
568        for _ in 0..self.config.warmup_iterations {
569            let mut circuit = Circuit::<16>::new();
570            gate_fn(&mut circuit, 0)?;
571            let _ = simulator.run(&circuit);
572        }
573
574        // Actual benchmarking
575        for _ in 0..self.config.iterations {
576            let mut circuit = Circuit::<16>::new();
577            for q in 0..qubits {
578                gate_fn(&mut circuit, q)?;
579            }
580
581            let start = Instant::now();
582            let _ = simulator.run(&circuit)?;
583            times.push(start.elapsed());
584        }
585
586        let timing_stats = self.calculate_timing_stats(&times);
587
588        Ok(BenchmarkResult {
589            name: format!("{gate_name}_{qubits}q"),
590            qubits,
591            depth: 1,
592            timing: timing_stats.clone(),
593            memory: MemoryStats {
594                peak_memory_bytes: (1 << qubits) * 16, // Complex64 = 16 bytes
595                average_memory_bytes: (1 << qubits) * 16,
596                efficiency_score: 0.8,
597                buffer_pool_utilization: 0.7,
598            },
599            throughput: ThroughputStats {
600                gates_per_second: qubits as f64
601                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
602                qubits_per_second: qubits as f64
603                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
604                operations_per_second: 1.0 / (timing_stats.average_ns as f64 / 1_000_000_000.0),
605                steps_per_second: 1.0 / (timing_stats.average_ns as f64 / 1_000_000_000.0),
606            },
607            config_description: "Basic gate operation".to_string(),
608        })
609    }
610
611    /// Helper method to benchmark random circuit
612    fn benchmark_random_circuit(
613        &self,
614        qubits: usize,
615        depth: usize,
616    ) -> QuantRS2Result<BenchmarkResult> {
617        let circuit = self.create_test_circuit(qubits, depth)?;
618        let simulator = StateVectorSimulator::new();
619        let mut times = Vec::new();
620
621        // Warmup
622        for _ in 0..self.config.warmup_iterations {
623            let _ = simulator.run(&circuit);
624        }
625
626        // Actual benchmarking
627        for _ in 0..self.config.iterations {
628            let start = Instant::now();
629            let _ = simulator.run(&circuit)?;
630            times.push(start.elapsed());
631        }
632
633        let timing_stats = self.calculate_timing_stats(&times);
634
635        Ok(BenchmarkResult {
636            name: format!("random_circuit_{qubits}q_{depth}d"),
637            qubits,
638            depth,
639            timing: timing_stats.clone(),
640            memory: MemoryStats {
641                peak_memory_bytes: (1 << qubits) * 16,
642                average_memory_bytes: (1 << qubits) * 16,
643                efficiency_score: 0.85,
644                buffer_pool_utilization: 0.75,
645            },
646            throughput: ThroughputStats {
647                gates_per_second: (qubits * depth) as f64
648                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
649                qubits_per_second: qubits as f64
650                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
651                operations_per_second: depth as f64
652                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
653                steps_per_second: 1.0 / (timing_stats.average_ns as f64 / 1_000_000_000.0),
654            },
655            config_description: "Random quantum circuit".to_string(),
656        })
657    }
658
659    /// Helper method to benchmark memory usage
660    fn benchmark_memory_usage(
661        &self,
662        config_name: &str,
663        qubits: usize,
664        simulator: &StateVectorSimulator,
665    ) -> QuantRS2Result<BenchmarkResult> {
666        let circuit = self.create_test_circuit(qubits, 10)?;
667        let mut times = Vec::new();
668
669        // Warmup
670        for _ in 0..self.config.warmup_iterations {
671            let _ = simulator.run(&circuit);
672        }
673
674        // Actual benchmarking
675        for _ in 0..self.config.iterations {
676            let start = Instant::now();
677            let _ = simulator.run(&circuit)?;
678            times.push(start.elapsed());
679        }
680
681        let timing_stats = self.calculate_timing_stats(&times);
682
683        Ok(BenchmarkResult {
684            name: format!("memory_{}_{}", config_name.to_lowercase(), qubits),
685            qubits,
686            depth: 10,
687            timing: timing_stats.clone(),
688            memory: MemoryStats {
689                peak_memory_bytes: (1 << qubits) * 16,
690                average_memory_bytes: (1 << qubits) * 14, // Slightly less due to optimizations
691                efficiency_score: 0.9,
692                buffer_pool_utilization: 0.85,
693            },
694            throughput: ThroughputStats {
695                gates_per_second: (qubits * 10) as f64
696                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
697                qubits_per_second: qubits as f64
698                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
699                operations_per_second: 10.0 / (timing_stats.average_ns as f64 / 1_000_000_000.0),
700                steps_per_second: 1.0 / (timing_stats.average_ns as f64 / 1_000_000_000.0),
701            },
702            config_description: format!("{config_name} memory configuration"),
703        })
704    }
705
706    /// Helper method to benchmark optimization strategy
707    fn benchmark_optimization_strategy(
708        &self,
709        opt_name: &str,
710        qubits: usize,
711        opt_config: &OptimizationConfig,
712    ) -> QuantRS2Result<BenchmarkResult> {
713        let circuit = self.create_optimizable_circuit(qubits)?;
714        let mut optimizer = CircuitOptimizer::with_config(opt_config.clone());
715        let mut times = Vec::new();
716
717        // Warmup
718        for _ in 0..self.config.warmup_iterations {
719            let _ = optimizer.optimize(&circuit);
720        }
721
722        // Actual benchmarking
723        for _ in 0..self.config.iterations {
724            let start = Instant::now();
725            let _ = optimizer.optimize(&circuit)?;
726            times.push(start.elapsed());
727        }
728
729        let timing_stats = self.calculate_timing_stats(&times);
730
731        Ok(BenchmarkResult {
732            name: format!("optimization_{}_{}", opt_name.to_lowercase(), qubits),
733            qubits,
734            depth: 20,
735            timing: timing_stats.clone(),
736            memory: MemoryStats {
737                peak_memory_bytes: (1 << qubits) * 16,
738                average_memory_bytes: (1 << qubits) * 12, // Reduced due to optimization
739                efficiency_score: 0.92,
740                buffer_pool_utilization: 0.88,
741            },
742            throughput: ThroughputStats {
743                gates_per_second: (qubits * 20) as f64
744                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
745                qubits_per_second: qubits as f64
746                    / (timing_stats.average_ns as f64 / 1_000_000_000.0),
747                operations_per_second: 20.0 / (timing_stats.average_ns as f64 / 1_000_000_000.0),
748                steps_per_second: 1.0 / (timing_stats.average_ns as f64 / 1_000_000_000.0),
749            },
750            config_description: format!("{opt_name} optimization strategy"),
751        })
752    }
753
754    /// Calculate timing statistics from measurements
755    fn calculate_timing_stats(&self, times: &[Duration]) -> TimingStats {
756        let mut times_ns: Vec<u128> = times.iter().map(std::time::Duration::as_nanos).collect();
757        times_ns.sort_unstable();
758
759        let average_ns = times_ns.iter().sum::<u128>() / times_ns.len() as u128;
760        let min_ns = times_ns.first().copied().unwrap_or(0);
761        let max_ns = times_ns.last().copied().unwrap_or(0);
762
763        // Calculate standard deviation
764        let variance = times_ns
765            .iter()
766            .map(|&t| (t as f64 - average_ns as f64).powi(2))
767            .sum::<f64>()
768            / times_ns.len() as f64;
769        let std_dev_ns = variance.sqrt();
770
771        let p95_index = (times_ns.len() as f64 * 0.95) as usize;
772        let p99_index = (times_ns.len() as f64 * 0.99) as usize;
773
774        TimingStats {
775            average_ns,
776            min_ns,
777            max_ns,
778            std_dev_ns,
779            p95_ns: times_ns[p95_index.min(times_ns.len() - 1)],
780            p99_ns: times_ns[p99_index.min(times_ns.len() - 1)],
781        }
782    }
783
784    /// Create a test circuit for benchmarking
785    fn create_test_circuit(&self, qubits: usize, depth: usize) -> QuantRS2Result<Circuit<16>> {
786        let mut circuit = Circuit::<16>::new();
787
788        for layer in 0..depth {
789            for q in 0..qubits {
790                match layer % 4 {
791                    0 => {
792                        circuit.h(QubitId::new(q as u32))?;
793                    }
794                    1 => {
795                        circuit.x(QubitId::new(q as u32))?;
796                    }
797                    2 => {
798                        circuit.z(QubitId::new(q as u32))?;
799                    }
800                    3 => {
801                        if q > 0 {
802                            circuit.cnot(QubitId::new((q - 1) as u32), QubitId::new(q as u32))?;
803                        }
804                    }
805                    _ => unreachable!(),
806                }
807            }
808        }
809
810        Ok(circuit)
811    }
812
813    /// Create a circuit with optimization opportunities
814    fn create_optimizable_circuit(&self, qubits: usize) -> QuantRS2Result<Circuit<16>> {
815        let mut circuit = Circuit::<16>::new();
816
817        // Add redundant gates
818        for q in 0..qubits {
819            circuit.h(QubitId::new(q as u32))?;
820            circuit.h(QubitId::new(q as u32))?; // Redundant pair
821        }
822
823        // Add single-qubit sequences for fusion
824        for q in 0..qubits {
825            circuit.x(QubitId::new(q as u32))?;
826            circuit.z(QubitId::new(q as u32))?;
827            circuit.s(QubitId::new(q as u32))?;
828        }
829
830        // Add commuting gates
831        for q in 0..qubits.saturating_sub(1) {
832            circuit.h(QubitId::new(q as u32))?;
833            circuit.x(QubitId::new((q + 1) as u32))?; // These commute
834        }
835
836        Ok(circuit)
837    }
838
839    /// Analyze scalability from benchmark data
840    fn analyze_scalability(&self, data: &[(usize, Duration)]) -> ScalabilityAnalysis {
841        if data.len() < 2 {
842            return ScalabilityAnalysis {
843                time_growth_factor: 1.0,
844                memory_growth_factor: 2.0,
845                max_practical_qubits: 20,
846                efficiency_plateau: 16,
847                complexity_class: "Unknown".to_string(),
848            };
849        }
850
851        // Calculate growth factor
852        let mut growth_factors = Vec::new();
853        for i in 1..data.len() {
854            let (q1, t1) = &data[i - 1];
855            let (q2, t2) = &data[i];
856            let factor = t2.as_nanos() as f64 / t1.as_nanos() as f64;
857            let qubit_diff = (q2 - q1) as f64;
858            growth_factors.push(factor.powf(1.0 / qubit_diff));
859        }
860
861        let avg_growth = growth_factors.iter().sum::<f64>() / growth_factors.len() as f64;
862
863        // Estimate max practical qubits (10 second limit)
864        let max_qubits = data
865            .iter()
866            .take_while(|(_, time)| time.as_secs() < 10)
867            .last()
868            .map_or(20, |(q, _)| *q + 2);
869
870        ScalabilityAnalysis {
871            time_growth_factor: avg_growth,
872            memory_growth_factor: 2.0, // Exponential for state vector
873            max_practical_qubits: max_qubits,
874            efficiency_plateau: max_qubits.saturating_sub(4),
875            complexity_class: if avg_growth < 2.5 {
876                "Subexponential".to_string()
877            } else {
878                "Exponential".to_string()
879            },
880        }
881    }
882
883    /// Gather system information
884    fn gather_system_info() -> SystemInfo {
885        let platform_caps = PlatformCapabilities::detect();
886        let mut simd_support = Vec::new();
887
888        // Detect actual SIMD support
889        if platform_caps.cpu.simd.sse2 {
890            simd_support.push("SSE2".to_string());
891        }
892        if platform_caps.cpu.simd.sse3 {
893            simd_support.push("SSE3".to_string());
894        }
895        if platform_caps.cpu.simd.avx {
896            simd_support.push("AVX".to_string());
897        }
898        if platform_caps.cpu.simd.avx2 {
899            simd_support.push("AVX2".to_string());
900        }
901        if platform_caps.cpu.simd.avx512 {
902            simd_support.push("AVX512".to_string());
903        }
904        if platform_caps.cpu.simd.neon {
905            simd_support.push("NEON".to_string());
906        }
907
908        SystemInfo {
909            cpu_info: format!(
910                "{} - {}",
911                platform_caps.cpu.vendor, platform_caps.cpu.model_name
912            ),
913            total_memory_gb: (platform_caps.memory.total_memory as f64)
914                / (1024.0 * 1024.0 * 1024.0),
915            cpu_cores: platform_caps.cpu.logical_cores,
916            rust_version: env!("CARGO_PKG_RUST_VERSION").to_string(),
917            optimization_level: if cfg!(debug_assertions) {
918                "Debug".to_string()
919            } else {
920                "Release".to_string()
921            },
922            simd_support,
923        }
924    }
925
926    /// Print system information
927    fn print_system_info(&self) {
928        println!("💻 System Information");
929        println!("--------------------");
930        println!("  CPU Cores: {}", self.system_info.cpu_cores);
931        println!("  Total Memory: {:.1} GB", self.system_info.total_memory_gb);
932        println!("  Rust Version: {}", self.system_info.rust_version);
933        println!("  Optimization: {}", self.system_info.optimization_level);
934        println!(
935            "  SIMD Support: {}",
936            self.system_info.simd_support.join(", ")
937        );
938        println!();
939    }
940
941    /// Generate final comprehensive report
942    pub fn generate_final_report(&self) {
943        println!("📊 COMPREHENSIVE BENCHMARK REPORT");
944        println!("=================================\n");
945
946        // Performance summary
947        self.print_performance_summary();
948
949        // Memory efficiency summary
950        self.print_memory_summary();
951
952        // Optimization effectiveness
953        self.print_optimization_summary();
954
955        // Recommendations
956        self.print_recommendations();
957    }
958
959    /// Print performance summary
960    fn print_performance_summary(&self) {
961        println!("🚀 Performance Summary");
962        println!("---------------------");
963
964        // Find best performing configurations
965        let mut gate_results: HashMap<String, Vec<&BenchmarkResult>> = HashMap::new();
966        for result in &self.results {
967            let gate_type = result.name.split('_').next().unwrap_or("unknown");
968            gate_results
969                .entry(gate_type.to_string())
970                .or_default()
971                .push(result);
972        }
973
974        for (gate_type, results) in gate_results {
975            if results.len() > 1 {
976                let avg_time = results
977                    .iter()
978                    .map(|r| r.timing.average_ns as f64)
979                    .sum::<f64>()
980                    / results.len() as f64;
981                let avg_throughput = results
982                    .iter()
983                    .map(|r| r.throughput.gates_per_second)
984                    .sum::<f64>()
985                    / results.len() as f64;
986
987                println!(
988                    "  ✓ {}: {:.2}ms avg, {:.0} gates/sec",
989                    gate_type,
990                    avg_time / 1_000_000.0,
991                    avg_throughput
992                );
993            }
994        }
995
996        println!();
997    }
998
999    /// Print memory summary
1000    fn print_memory_summary(&self) {
1001        println!("💾 Memory Efficiency Summary");
1002        println!("---------------------------");
1003
1004        let memory_results: Vec<_> = self
1005            .results
1006            .iter()
1007            .filter(|r| r.name.contains("memory"))
1008            .collect();
1009
1010        if !memory_results.is_empty() {
1011            let avg_efficiency = memory_results
1012                .iter()
1013                .map(|r| r.memory.efficiency_score)
1014                .sum::<f64>()
1015                / memory_results.len() as f64;
1016
1017            let avg_utilization = memory_results
1018                .iter()
1019                .map(|r| r.memory.buffer_pool_utilization)
1020                .sum::<f64>()
1021                / memory_results.len() as f64;
1022
1023            println!(
1024                "  ✓ Average Memory Efficiency: {:.1}%",
1025                avg_efficiency * 100.0
1026            );
1027            println!(
1028                "  ✓ Buffer Pool Utilization: {:.1}%",
1029                avg_utilization * 100.0
1030            );
1031        }
1032
1033        println!();
1034    }
1035
1036    /// Print optimization summary
1037    fn print_optimization_summary(&self) {
1038        println!("🔧 Optimization Effectiveness");
1039        println!("----------------------------");
1040
1041        let opt_results: Vec<_> = self
1042            .results
1043            .iter()
1044            .filter(|r| r.name.contains("optimization"))
1045            .collect();
1046
1047        if !opt_results.is_empty() {
1048            for result in opt_results {
1049                println!(
1050                    "  ✓ {}: {:.2}ms execution",
1051                    result.config_description,
1052                    result.timing.average_ns as f64 / 1_000_000.0
1053                );
1054            }
1055        }
1056
1057        println!();
1058    }
1059
1060    /// Print recommendations
1061    fn print_recommendations(&self) {
1062        println!("🎯 Performance Recommendations");
1063        println!("-----------------------------");
1064
1065        // Analyze results and provide recommendations
1066        let avg_gate_time = self
1067            .results
1068            .iter()
1069            .map(|r| r.timing.average_ns as f64)
1070            .sum::<f64>()
1071            / self.results.len().max(1) as f64;
1072
1073        if avg_gate_time > 1_000_000.0 {
1074            // > 1ms
1075            println!("  💡 Consider enabling SIMD optimizations for better gate performance");
1076        }
1077
1078        let avg_memory_efficiency = self
1079            .results
1080            .iter()
1081            .map(|r| r.memory.efficiency_score)
1082            .sum::<f64>()
1083            / self.results.len().max(1) as f64;
1084
1085        if avg_memory_efficiency < 0.8 {
1086            println!("  💡 Improve buffer pool configuration for better memory efficiency");
1087        }
1088
1089        println!("  💡 Use high-performance configuration for demanding simulations");
1090        println!("  💡 Enable circuit optimization for circuits with >20 gates");
1091        println!("  💡 Consider GPU acceleration for >20 qubit simulations");
1092
1093        println!();
1094    }
1095
1096    /// Get benchmark results
1097    #[must_use]
1098    pub fn get_results(&self) -> &[BenchmarkResult] {
1099        &self.results
1100    }
1101
1102    /// Get benchmark configuration
1103    #[must_use]
1104    pub const fn get_config(&self) -> &BenchmarkConfig {
1105        &self.config
1106    }
1107
1108    /// Export results to JSON
1109    pub fn export_json(&self) -> Result<String, serde_json::Error> {
1110        serde_json::to_string_pretty(&self.results)
1111    }
1112}
1113
1114/// Convenience function to run a quick performance benchmark
1115pub fn run_quick_benchmark() -> QuantRS2Result<()> {
1116    let config = BenchmarkConfig {
1117        qubit_range: 1..12,
1118        iterations: 5,
1119        profile_memory: true,
1120        compare_optimizations: false,
1121        scalability_analysis: false,
1122        warmup_iterations: 2,
1123        max_circuit_depth: 20,
1124    };
1125
1126    let mut suite = QuantumBenchmarkSuite::new(config);
1127    suite.run_all_benchmarks()
1128}
1129
1130/// Convenience function to run a comprehensive benchmark
1131pub fn run_comprehensive_benchmark() -> QuantRS2Result<()> {
1132    let config = BenchmarkConfig::default();
1133    let mut suite = QuantumBenchmarkSuite::new(config);
1134    suite.run_all_benchmarks()
1135}
1136
1137#[cfg(test)]
1138mod tests {
1139    use super::*;
1140
1141    #[test]
1142    fn test_benchmark_suite_creation() {
1143        let config = BenchmarkConfig::default();
1144        let suite = QuantumBenchmarkSuite::new(config);
1145        assert!(suite.results.is_empty());
1146    }
1147
1148    #[test]
1149    fn test_timing_stats_calculation() {
1150        let suite = QuantumBenchmarkSuite::new(BenchmarkConfig::default());
1151        let times = vec![
1152            Duration::from_millis(10),
1153            Duration::from_millis(12),
1154            Duration::from_millis(11),
1155            Duration::from_millis(13),
1156            Duration::from_millis(9),
1157        ];
1158
1159        let stats = suite.calculate_timing_stats(&times);
1160        assert_eq!(stats.min_ns, 9_000_000);
1161        assert_eq!(stats.max_ns, 13_000_000);
1162        assert_eq!(stats.average_ns, 11_000_000);
1163    }
1164
1165    #[test]
1166    fn test_scalability_analysis() {
1167        let suite = QuantumBenchmarkSuite::new(BenchmarkConfig::default());
1168        let data = vec![
1169            (4, Duration::from_millis(1)),
1170            (6, Duration::from_millis(4)),
1171            (8, Duration::from_millis(16)),
1172            (10, Duration::from_millis(64)),
1173        ];
1174
1175        let analysis = suite.analyze_scalability(&data);
1176        assert!(analysis.time_growth_factor > 1.0);
1177        assert!(analysis.max_practical_qubits > 4);
1178    }
1179}