use quantrs2_core::error::{QuantRS2Error, QuantRS2Result};
use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::Complex64;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex, RwLock};
use std::time::{Duration, Instant};
#[derive(Debug)]
pub struct GPUKernelOptimizer {
kernel_registry: KernelRegistry,
stats: Arc<Mutex<KernelStats>>,
config: GPUKernelConfig,
kernel_cache: Arc<RwLock<HashMap<String, CompiledKernel>>>,
memory_optimizer: MemoryLayoutOptimizer,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GPUKernelConfig {
pub enable_warp_optimization: bool,
pub enable_shared_memory: bool,
pub block_size: usize,
pub grid_size_method: GridSizeMethod,
pub enable_kernel_fusion: bool,
pub max_fusion_length: usize,
pub enable_memory_coalescing: bool,
pub enable_streaming: bool,
pub num_streams: usize,
pub target_occupancy: f64,
}
impl Default for GPUKernelConfig {
fn default() -> Self {
Self {
enable_warp_optimization: true,
enable_shared_memory: true,
block_size: 256,
grid_size_method: GridSizeMethod::Automatic,
enable_kernel_fusion: true,
max_fusion_length: 8,
enable_memory_coalescing: true,
enable_streaming: true,
num_streams: 4,
target_occupancy: 0.75,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum GridSizeMethod {
Automatic,
Fixed(usize),
OccupancyBased,
}
#[derive(Debug)]
pub struct KernelRegistry {
single_qubit_kernels: HashMap<String, SingleQubitKernel>,
two_qubit_kernels: HashMap<String, TwoQubitKernel>,
fused_kernels: HashMap<String, FusedKernel>,
custom_kernels: HashMap<String, CustomKernel>,
}
impl Default for KernelRegistry {
fn default() -> Self {
let mut registry = Self {
single_qubit_kernels: HashMap::new(),
two_qubit_kernels: HashMap::new(),
fused_kernels: HashMap::new(),
custom_kernels: HashMap::new(),
};
registry.register_builtin_kernels();
registry
}
}
impl KernelRegistry {
fn register_builtin_kernels(&mut self) {
self.single_qubit_kernels.insert(
"hadamard".to_string(),
SingleQubitKernel {
name: "hadamard".to_string(),
kernel_type: SingleQubitKernelType::Hadamard,
optimization_level: OptimizationLevel::Maximum,
uses_shared_memory: true,
register_usage: 32,
},
);
self.single_qubit_kernels.insert(
"pauli_x".to_string(),
SingleQubitKernel {
name: "pauli_x".to_string(),
kernel_type: SingleQubitKernelType::PauliX,
optimization_level: OptimizationLevel::Maximum,
uses_shared_memory: false, register_usage: 16,
},
);
self.single_qubit_kernels.insert(
"pauli_y".to_string(),
SingleQubitKernel {
name: "pauli_y".to_string(),
kernel_type: SingleQubitKernelType::PauliY,
optimization_level: OptimizationLevel::Maximum,
uses_shared_memory: false,
register_usage: 24,
},
);
self.single_qubit_kernels.insert(
"pauli_z".to_string(),
SingleQubitKernel {
name: "pauli_z".to_string(),
kernel_type: SingleQubitKernelType::PauliZ,
optimization_level: OptimizationLevel::Maximum,
uses_shared_memory: false,
register_usage: 16,
},
);
self.single_qubit_kernels.insert(
"phase".to_string(),
SingleQubitKernel {
name: "phase".to_string(),
kernel_type: SingleQubitKernelType::Phase,
optimization_level: OptimizationLevel::High,
uses_shared_memory: false,
register_usage: 24,
},
);
self.single_qubit_kernels.insert(
"t_gate".to_string(),
SingleQubitKernel {
name: "t_gate".to_string(),
kernel_type: SingleQubitKernelType::TGate,
optimization_level: OptimizationLevel::High,
uses_shared_memory: false,
register_usage: 24,
},
);
self.single_qubit_kernels.insert(
"rotation_x".to_string(),
SingleQubitKernel {
name: "rotation_x".to_string(),
kernel_type: SingleQubitKernelType::RotationX,
optimization_level: OptimizationLevel::Medium,
uses_shared_memory: true,
register_usage: 40,
},
);
self.single_qubit_kernels.insert(
"rotation_y".to_string(),
SingleQubitKernel {
name: "rotation_y".to_string(),
kernel_type: SingleQubitKernelType::RotationY,
optimization_level: OptimizationLevel::Medium,
uses_shared_memory: true,
register_usage: 40,
},
);
self.single_qubit_kernels.insert(
"rotation_z".to_string(),
SingleQubitKernel {
name: "rotation_z".to_string(),
kernel_type: SingleQubitKernelType::RotationZ,
optimization_level: OptimizationLevel::Medium,
uses_shared_memory: true,
register_usage: 32,
},
);
self.two_qubit_kernels.insert(
"cnot".to_string(),
TwoQubitKernel {
name: "cnot".to_string(),
kernel_type: TwoQubitKernelType::CNOT,
optimization_level: OptimizationLevel::Maximum,
uses_shared_memory: true,
register_usage: 48,
memory_access_pattern: MemoryAccessPattern::Strided,
},
);
self.two_qubit_kernels.insert(
"cz".to_string(),
TwoQubitKernel {
name: "cz".to_string(),
kernel_type: TwoQubitKernelType::CZ,
optimization_level: OptimizationLevel::Maximum,
uses_shared_memory: false,
register_usage: 32,
memory_access_pattern: MemoryAccessPattern::Sparse,
},
);
self.two_qubit_kernels.insert(
"swap".to_string(),
TwoQubitKernel {
name: "swap".to_string(),
kernel_type: TwoQubitKernelType::SWAP,
optimization_level: OptimizationLevel::High,
uses_shared_memory: true,
register_usage: 40,
memory_access_pattern: MemoryAccessPattern::Strided,
},
);
self.two_qubit_kernels.insert(
"iswap".to_string(),
TwoQubitKernel {
name: "iswap".to_string(),
kernel_type: TwoQubitKernelType::ISWAP,
optimization_level: OptimizationLevel::High,
uses_shared_memory: true,
register_usage: 48,
memory_access_pattern: MemoryAccessPattern::Strided,
},
);
self.two_qubit_kernels.insert(
"controlled_rotation".to_string(),
TwoQubitKernel {
name: "controlled_rotation".to_string(),
kernel_type: TwoQubitKernelType::ControlledRotation,
optimization_level: OptimizationLevel::Medium,
uses_shared_memory: true,
register_usage: 56,
memory_access_pattern: MemoryAccessPattern::Strided,
},
);
self.fused_kernels.insert(
"h_cnot_h".to_string(),
FusedKernel {
name: "h_cnot_h".to_string(),
sequence: vec![
"hadamard".to_string(),
"cnot".to_string(),
"hadamard".to_string(),
],
optimization_gain: 2.5,
register_usage: 64,
},
);
self.fused_kernels.insert(
"rotation_chain".to_string(),
FusedKernel {
name: "rotation_chain".to_string(),
sequence: vec![
"rotation_x".to_string(),
"rotation_y".to_string(),
"rotation_z".to_string(),
],
optimization_gain: 2.0,
register_usage: 56,
},
);
self.fused_kernels.insert(
"bell_state".to_string(),
FusedKernel {
name: "bell_state".to_string(),
sequence: vec!["hadamard".to_string(), "cnot".to_string()],
optimization_gain: 1.8,
register_usage: 48,
},
);
}
}
#[derive(Debug, Clone)]
pub struct SingleQubitKernel {
pub name: String,
pub kernel_type: SingleQubitKernelType,
pub optimization_level: OptimizationLevel,
pub uses_shared_memory: bool,
pub register_usage: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SingleQubitKernelType {
Hadamard,
PauliX,
PauliY,
PauliZ,
Phase,
TGate,
RotationX,
RotationY,
RotationZ,
Generic,
}
#[derive(Debug, Clone)]
pub struct TwoQubitKernel {
pub name: String,
pub kernel_type: TwoQubitKernelType,
pub optimization_level: OptimizationLevel,
pub uses_shared_memory: bool,
pub register_usage: usize,
pub memory_access_pattern: MemoryAccessPattern,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TwoQubitKernelType {
CNOT,
CZ,
SWAP,
ISWAP,
ControlledRotation,
Generic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MemoryAccessPattern {
Coalesced,
Strided,
Sparse,
Random,
}
#[derive(Debug, Clone)]
pub struct FusedKernel {
pub name: String,
pub sequence: Vec<String>,
pub optimization_gain: f64,
pub register_usage: usize,
}
#[derive(Debug, Clone)]
pub struct CustomKernel {
pub name: String,
pub code: String,
pub register_usage: usize,
}
#[derive(Debug, Clone)]
pub struct CompiledKernel {
pub name: String,
pub compiled_code: Vec<u8>,
pub exec_params: KernelExecParams,
}
#[derive(Debug, Clone)]
pub struct KernelExecParams {
pub block_dim: (usize, usize, usize),
pub grid_dim: (usize, usize, usize),
pub shared_memory_size: usize,
pub max_threads_per_block: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OptimizationLevel {
Basic,
Medium,
High,
Maximum,
}
#[derive(Debug, Clone, Default)]
pub struct KernelStats {
pub total_executions: u64,
pub total_execution_time: Duration,
pub execution_counts: HashMap<String, u64>,
pub execution_times: HashMap<String, Duration>,
pub cache_hits: u64,
pub cache_misses: u64,
pub fused_operations: u64,
pub memory_bandwidth: f64,
pub compute_throughput: f64,
}
#[derive(Debug)]
pub struct MemoryLayoutOptimizer {
strategy: MemoryLayoutStrategy,
prefetch_distance: usize,
}
#[derive(Debug, Clone, Copy)]
pub enum MemoryLayoutStrategy {
Interleaved,
SplitArrays,
StructureOfArrays,
ArrayOfStructures,
}
impl Default for MemoryLayoutOptimizer {
fn default() -> Self {
Self {
strategy: MemoryLayoutStrategy::Interleaved,
prefetch_distance: 4,
}
}
}
impl GPUKernelOptimizer {
#[must_use]
pub fn new(config: GPUKernelConfig) -> Self {
Self {
kernel_registry: KernelRegistry::default(),
stats: Arc::new(Mutex::new(KernelStats::default())),
config,
kernel_cache: Arc::new(RwLock::new(HashMap::new())),
memory_optimizer: MemoryLayoutOptimizer::default(),
}
}
pub fn apply_single_qubit_gate(
&mut self,
state: &mut Array1<Complex64>,
qubit: usize,
gate_name: &str,
parameters: Option<&[f64]>,
) -> QuantRS2Result<()> {
let start = Instant::now();
let kernel = self.kernel_registry.single_qubit_kernels.get(gate_name);
let n = state.len();
let stride = 1 << qubit;
match kernel {
Some(k) => {
match k.kernel_type {
SingleQubitKernelType::Hadamard => {
self.apply_hadamard_optimized(state, stride)?;
}
SingleQubitKernelType::PauliX => {
self.apply_pauli_x_optimized(state, stride)?;
}
SingleQubitKernelType::PauliY => {
self.apply_pauli_y_optimized(state, stride)?;
}
SingleQubitKernelType::PauliZ => {
self.apply_pauli_z_optimized(state, stride)?;
}
SingleQubitKernelType::Phase => {
self.apply_phase_optimized(state, stride)?;
}
SingleQubitKernelType::TGate => {
self.apply_t_gate_optimized(state, stride)?;
}
SingleQubitKernelType::RotationX => {
let angle = parameters.and_then(|p| p.first()).copied().unwrap_or(0.0);
self.apply_rotation_x_optimized(state, stride, angle)?;
}
SingleQubitKernelType::RotationY => {
let angle = parameters.and_then(|p| p.first()).copied().unwrap_or(0.0);
self.apply_rotation_y_optimized(state, stride, angle)?;
}
SingleQubitKernelType::RotationZ => {
let angle = parameters.and_then(|p| p.first()).copied().unwrap_or(0.0);
self.apply_rotation_z_optimized(state, stride, angle)?;
}
SingleQubitKernelType::Generic => {
self.apply_generic_single_qubit(state, qubit, gate_name)?;
}
}
}
None => {
self.apply_generic_single_qubit(state, qubit, gate_name)?;
}
}
let mut stats = self
.stats
.lock()
.map_err(|_| QuantRS2Error::InvalidInput("Failed to acquire stats lock".to_string()))?;
stats.total_executions += 1;
stats.total_execution_time += start.elapsed();
*stats
.execution_counts
.entry(gate_name.to_string())
.or_insert(0) += 1;
*stats
.execution_times
.entry(gate_name.to_string())
.or_insert(Duration::ZERO) += start.elapsed();
Ok(())
}
fn apply_hadamard_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let inv_sqrt2 = 1.0 / 2.0_f64.sqrt();
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i0 = (i / stride) * (2 * stride) + (i % stride);
let i1 = i0 + stride;
let a0 = amplitudes[i0];
let a1 = amplitudes[i1];
amplitudes[i0] =
Complex64::new((a0.re + a1.re) * inv_sqrt2, (a0.im + a1.im) * inv_sqrt2);
amplitudes[i1] =
Complex64::new((a0.re - a1.re) * inv_sqrt2, (a0.im - a1.im) * inv_sqrt2);
}
Ok(())
}
fn apply_pauli_x_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i0 = (i / stride) * (2 * stride) + (i % stride);
let i1 = i0 + stride;
amplitudes.swap(i0, i1);
}
Ok(())
}
fn apply_pauli_y_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i0 = (i / stride) * (2 * stride) + (i % stride);
let i1 = i0 + stride;
let a0 = amplitudes[i0];
let a1 = amplitudes[i1];
amplitudes[i0] = Complex64::new(a1.im, -a1.re);
amplitudes[i1] = Complex64::new(-a0.im, a0.re);
}
Ok(())
}
fn apply_pauli_z_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i1 = (i / stride) * (2 * stride) + (i % stride) + stride;
amplitudes[i1] = -amplitudes[i1];
}
Ok(())
}
fn apply_phase_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i1 = (i / stride) * (2 * stride) + (i % stride) + stride;
let a = amplitudes[i1];
amplitudes[i1] = Complex64::new(-a.im, a.re); }
Ok(())
}
fn apply_t_gate_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let t_phase = Complex64::new(
std::f64::consts::FRAC_1_SQRT_2,
std::f64::consts::FRAC_1_SQRT_2,
);
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i1 = (i / stride) * (2 * stride) + (i % stride) + stride;
amplitudes[i1] *= t_phase;
}
Ok(())
}
fn apply_rotation_x_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
angle: f64,
) -> QuantRS2Result<()> {
let n = state.len();
let cos_half = (angle / 2.0).cos();
let sin_half = (angle / 2.0).sin();
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i0 = (i / stride) * (2 * stride) + (i % stride);
let i1 = i0 + stride;
let a0 = amplitudes[i0];
let a1 = amplitudes[i1];
amplitudes[i0] = Complex64::new(
cos_half * a0.re + sin_half * a1.im,
cos_half * a0.im - sin_half * a1.re,
);
amplitudes[i1] = Complex64::new(
sin_half * a0.im + cos_half * a1.re,
(-sin_half).mul_add(a0.re, cos_half * a1.im),
);
}
Ok(())
}
fn apply_rotation_y_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
angle: f64,
) -> QuantRS2Result<()> {
let n = state.len();
let cos_half = (angle / 2.0).cos();
let sin_half = (angle / 2.0).sin();
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i0 = (i / stride) * (2 * stride) + (i % stride);
let i1 = i0 + stride;
let a0 = amplitudes[i0];
let a1 = amplitudes[i1];
amplitudes[i0] = Complex64::new(
cos_half * a0.re - sin_half * a1.re,
cos_half * a0.im - sin_half * a1.im,
);
amplitudes[i1] = Complex64::new(
sin_half * a0.re + cos_half * a1.re,
sin_half * a0.im + cos_half * a1.im,
);
}
Ok(())
}
fn apply_rotation_z_optimized(
&self,
state: &mut Array1<Complex64>,
stride: usize,
angle: f64,
) -> QuantRS2Result<()> {
let n = state.len();
let exp_neg = Complex64::new((angle / 2.0).cos(), -(angle / 2.0).sin());
let exp_pos = Complex64::new((angle / 2.0).cos(), (angle / 2.0).sin());
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n / 2 {
let i0 = (i / stride) * (2 * stride) + (i % stride);
let i1 = i0 + stride;
amplitudes[i0] *= exp_neg;
amplitudes[i1] *= exp_pos;
}
Ok(())
}
const fn apply_generic_single_qubit(
&self,
state: &Array1<Complex64>,
qubit: usize,
_gate_name: &str,
) -> QuantRS2Result<()> {
Ok(())
}
pub fn apply_two_qubit_gate(
&mut self,
state: &mut Array1<Complex64>,
control: usize,
target: usize,
gate_name: &str,
) -> QuantRS2Result<()> {
let start = Instant::now();
let kernel = self.kernel_registry.two_qubit_kernels.get(gate_name);
match kernel {
Some(k) => match k.kernel_type {
TwoQubitKernelType::CNOT => {
self.apply_cnot_optimized(state, control, target)?;
}
TwoQubitKernelType::CZ => {
self.apply_cz_optimized(state, control, target)?;
}
TwoQubitKernelType::SWAP => {
self.apply_swap_optimized(state, control, target)?;
}
TwoQubitKernelType::ISWAP => {
self.apply_iswap_optimized(state, control, target)?;
}
_ => {
self.apply_generic_two_qubit(state, control, target, gate_name)?;
}
},
None => {
self.apply_generic_two_qubit(state, control, target, gate_name)?;
}
}
let mut stats = self
.stats
.lock()
.map_err(|_| QuantRS2Error::InvalidInput("Failed to acquire stats lock".to_string()))?;
stats.total_executions += 1;
stats.total_execution_time += start.elapsed();
*stats
.execution_counts
.entry(gate_name.to_string())
.or_insert(0) += 1;
Ok(())
}
fn apply_cnot_optimized(
&self,
state: &mut Array1<Complex64>,
control: usize,
target: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let control_stride = 1 << control;
let target_stride = 1 << target;
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n {
if (i & control_stride) != 0 {
let partner = i ^ target_stride;
if partner > i {
amplitudes.swap(i, partner);
}
}
}
Ok(())
}
fn apply_cz_optimized(
&self,
state: &mut Array1<Complex64>,
control: usize,
target: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let control_stride = 1 << control;
let target_stride = 1 << target;
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for (i, amplitude) in amplitudes.iter_mut().enumerate() {
if (i & control_stride) != 0 && (i & target_stride) != 0 {
*amplitude = -*amplitude;
}
}
Ok(())
}
fn apply_swap_optimized(
&self,
state: &mut Array1<Complex64>,
qubit1: usize,
qubit2: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let stride1 = 1 << qubit1;
let stride2 = 1 << qubit2;
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n {
let bit1 = (i & stride1) != 0;
let bit2 = (i & stride2) != 0;
if bit1 != bit2 {
let partner = i ^ stride1 ^ stride2;
if partner > i {
amplitudes.swap(i, partner);
}
}
}
Ok(())
}
fn apply_iswap_optimized(
&self,
state: &mut Array1<Complex64>,
qubit1: usize,
qubit2: usize,
) -> QuantRS2Result<()> {
let n = state.len();
let stride1 = 1 << qubit1;
let stride2 = 1 << qubit2;
let amplitudes = state.as_slice_mut().ok_or_else(|| {
QuantRS2Error::InvalidInput("Failed to get mutable slice".to_string())
})?;
for i in 0..n {
let bit1 = (i & stride1) != 0;
let bit2 = (i & stride2) != 0;
if bit1 != bit2 {
let partner = i ^ stride1 ^ stride2;
if partner > i {
let a = amplitudes[i];
let b = amplitudes[partner];
amplitudes[i] = Complex64::new(-b.im, b.re);
amplitudes[partner] = Complex64::new(-a.im, a.re);
}
}
}
Ok(())
}
const fn apply_generic_two_qubit(
&self,
_state: &mut Array1<Complex64>,
_control: usize,
_target: usize,
_gate_name: &str,
) -> QuantRS2Result<()> {
Ok(())
}
pub fn get_stats(&self) -> QuantRS2Result<KernelStats> {
let stats = self
.stats
.lock()
.map_err(|_| QuantRS2Error::InvalidInput("Failed to acquire stats lock".to_string()))?;
Ok(stats.clone())
}
pub fn reset_stats(&mut self) -> QuantRS2Result<()> {
let mut stats = self
.stats
.lock()
.map_err(|_| QuantRS2Error::InvalidInput("Failed to acquire stats lock".to_string()))?;
*stats = KernelStats::default();
Ok(())
}
#[must_use]
pub fn get_available_kernels(&self) -> Vec<String> {
let mut kernels = Vec::new();
kernels.extend(self.kernel_registry.single_qubit_kernels.keys().cloned());
kernels.extend(self.kernel_registry.two_qubit_kernels.keys().cloned());
kernels.extend(self.kernel_registry.fused_kernels.keys().cloned());
kernels
}
#[must_use]
pub fn has_kernel(&self, name: &str) -> bool {
self.kernel_registry.single_qubit_kernels.contains_key(name)
|| self.kernel_registry.two_qubit_kernels.contains_key(name)
|| self.kernel_registry.fused_kernels.contains_key(name)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_kernel_optimizer_creation() {
let config = GPUKernelConfig::default();
let optimizer = GPUKernelOptimizer::new(config);
assert!(!optimizer.get_available_kernels().is_empty());
}
#[test]
fn test_hadamard_kernel() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![Complex64::new(1.0, 0.0), Complex64::new(0.0, 0.0)]);
let result = optimizer.apply_single_qubit_gate(&mut state, 0, "hadamard", None);
assert!(result.is_ok());
let inv_sqrt2 = 1.0 / 2.0_f64.sqrt();
assert!((state[0].re - inv_sqrt2).abs() < 1e-10);
assert!((state[1].re - inv_sqrt2).abs() < 1e-10);
}
#[test]
fn test_pauli_x_kernel() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![Complex64::new(1.0, 0.0), Complex64::new(0.0, 0.0)]);
let result = optimizer.apply_single_qubit_gate(&mut state, 0, "pauli_x", None);
assert!(result.is_ok());
assert!((state[0].re - 0.0).abs() < 1e-10);
assert!((state[1].re - 1.0).abs() < 1e-10);
}
#[test]
fn test_pauli_z_kernel() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![Complex64::new(0.5, 0.0), Complex64::new(0.5, 0.0)]);
let result = optimizer.apply_single_qubit_gate(&mut state, 0, "pauli_z", None);
assert!(result.is_ok());
assert!((state[0].re - 0.5).abs() < 1e-10);
assert!((state[1].re + 0.5).abs() < 1e-10);
}
#[test]
fn test_rotation_z_kernel() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![Complex64::new(1.0, 0.0), Complex64::new(0.0, 0.0)]);
let result = optimizer.apply_single_qubit_gate(
&mut state,
0,
"rotation_z",
Some(&[std::f64::consts::PI]),
);
assert!(result.is_ok());
}
#[test]
fn test_cnot_kernel() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![
Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(1.0, 0.0),
Complex64::new(0.0, 0.0),
]);
let result = optimizer.apply_two_qubit_gate(&mut state, 1, 0, "cnot");
assert!(result.is_ok());
assert!((state[3].re - 1.0).abs() < 1e-10);
}
#[test]
fn test_cz_kernel() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![
Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(1.0, 0.0),
]);
let result = optimizer.apply_two_qubit_gate(&mut state, 1, 0, "cz");
assert!(result.is_ok());
assert!((state[3].re + 1.0).abs() < 1e-10);
}
#[test]
fn test_swap_kernel() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![
Complex64::new(0.0, 0.0),
Complex64::new(1.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0),
]);
let result = optimizer.apply_two_qubit_gate(&mut state, 0, 1, "swap");
assert!(result.is_ok());
assert!((state[2].re - 1.0).abs() < 1e-10);
}
#[test]
fn test_kernel_stats() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![Complex64::new(1.0, 0.0), Complex64::new(0.0, 0.0)]);
optimizer
.apply_single_qubit_gate(&mut state, 0, "hadamard", None)
.expect("hadamard gate should apply successfully");
optimizer
.apply_single_qubit_gate(&mut state, 0, "pauli_x", None)
.expect("pauli_x gate should apply successfully");
let stats = optimizer.get_stats().expect("get_stats should succeed");
assert_eq!(stats.total_executions, 2);
assert_eq!(*stats.execution_counts.get("hadamard").unwrap_or(&0), 1);
assert_eq!(*stats.execution_counts.get("pauli_x").unwrap_or(&0), 1);
}
#[test]
fn test_available_kernels() {
let config = GPUKernelConfig::default();
let optimizer = GPUKernelOptimizer::new(config);
let kernels = optimizer.get_available_kernels();
assert!(kernels.contains(&"hadamard".to_string()));
assert!(kernels.contains(&"cnot".to_string()));
assert!(kernels.contains(&"swap".to_string()));
}
#[test]
fn test_has_kernel() {
let config = GPUKernelConfig::default();
let optimizer = GPUKernelOptimizer::new(config);
assert!(optimizer.has_kernel("hadamard"));
assert!(optimizer.has_kernel("cnot"));
assert!(!optimizer.has_kernel("nonexistent"));
}
#[test]
fn test_config_defaults() {
let config = GPUKernelConfig::default();
assert!(config.enable_warp_optimization);
assert!(config.enable_shared_memory);
assert_eq!(config.block_size, 256);
assert!(config.enable_kernel_fusion);
assert_eq!(config.max_fusion_length, 8);
}
#[test]
fn test_reset_stats() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::from_vec(vec![Complex64::new(1.0, 0.0), Complex64::new(0.0, 0.0)]);
optimizer
.apply_single_qubit_gate(&mut state, 0, "hadamard", None)
.expect("hadamard gate should apply successfully");
optimizer.reset_stats().expect("reset_stats should succeed");
let stats = optimizer.get_stats().expect("get_stats should succeed");
assert_eq!(stats.total_executions, 0);
}
#[test]
fn test_multiple_qubit_operations() {
let config = GPUKernelConfig::default();
let mut optimizer = GPUKernelOptimizer::new(config);
let mut state = Array1::zeros(8);
state[0] = Complex64::new(1.0, 0.0);
optimizer
.apply_single_qubit_gate(&mut state, 0, "hadamard", None)
.expect("hadamard gate should apply successfully");
optimizer
.apply_two_qubit_gate(&mut state, 0, 1, "cnot")
.expect("cnot gate should apply successfully");
let total_prob: f64 = state.iter().map(|a| (a * a.conj()).re).sum();
assert!((total_prob - 1.0).abs() < 1e-10);
}
}