use crate::{
error::{QuantRS2Error, QuantRS2Result},
matrix_ops::QuantumMatrix,
tensor_network::{Tensor, TensorNetwork},
};
use scirs2_core::ndarray::{Array1, Array2, ArrayD, Axis, IxDyn};
use scirs2_core::Complex64;
use std::{
collections::HashMap,
sync::{Arc, Mutex, RwLock},
fmt,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum GpuBackend {
CUDA,
OpenCL,
ROCm,
WebGPU,
Metal,
}
impl fmt::Display for GpuBackend {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
GpuBackend::CUDA => write!(f, "CUDA"),
GpuBackend::OpenCL => write!(f, "OpenCL"),
GpuBackend::ROCm => write!(f, "ROCm"),
GpuBackend::WebGPU => write!(f, "WebGPU"),
GpuBackend::Metal => write!(f, "Metal"),
}
}
}
#[derive(Debug, Clone)]
pub struct GpuDevice {
pub id: u32,
pub name: String,
pub backend: GpuBackend,
pub memory_size: usize,
pub compute_units: u32,
pub max_work_group_size: usize,
pub supports_double_precision: bool,
pub is_available: bool,
}
#[derive(Debug)]
pub struct GpuBuffer {
buffer_id: u64,
size: usize,
device_id: u32,
backend: GpuBackend,
is_pinned: bool,
}
impl GpuBuffer {
pub fn new(device_id: u32, backend: GpuBackend, size: usize, pinned: bool) -> QuantRS2Result<Self> {
let buffer_id = Self::allocate_buffer(device_id, backend, size, pinned)?;
Ok(Self {
buffer_id,
size,
device_id,
backend,
is_pinned: pinned,
})
}
fn allocate_buffer(device_id: u32, backend: GpuBackend, size: usize, _pinned: bool) -> QuantRS2Result<u64> {
static NEXT_BUFFER_ID: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1);
if size == 0 {
return Err(QuantRS2Error::InvalidParameter("Buffer size cannot be zero".to_string()));
}
match backend {
GpuBackend::CUDA => {
if device_id >= 8 {
return Err(QuantRS2Error::InvalidParameter("CUDA device ID out of range".to_string()));
}
}
GpuBackend::OpenCL | GpuBackend::ROCm => {
if device_id >= 16 {
return Err(QuantRS2Error::InvalidParameter("Device ID out of range".to_string()));
}
}
_ => {}
}
Ok(NEXT_BUFFER_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed))
}
pub fn copy_from_host(&mut self, data: &[Complex64]) -> QuantRS2Result<()> {
if data.len() * std::mem::size_of::<Complex64>() > self.size {
return Err(QuantRS2Error::InvalidParameter("Data size exceeds buffer capacity".to_string()));
}
println!("Copying {} complex numbers to GPU buffer {} on {} device {}",
data.len(), self.buffer_id, self.backend, self.device_id);
Ok(())
}
pub fn copy_to_host(&self, data: &mut [Complex64]) -> QuantRS2Result<()> {
if data.len() * std::mem::size_of::<Complex64>() > self.size {
return Err(QuantRS2Error::InvalidParameter("Data size exceeds buffer capacity".to_string()));
}
println!("Copying {} complex numbers from GPU buffer {} on {} device {}",
data.len(), self.buffer_id, self.backend, self.device_id);
Ok(())
}
}
impl Drop for GpuBuffer {
fn drop(&mut self) {
println!("Deallocating GPU buffer {} on {} device {}",
self.buffer_id, self.backend, self.device_id);
}
}
#[derive(Debug)]
pub struct GpuContext {
devices: Vec<GpuDevice>,
active_device: Option<u32>,
backend: GpuBackend,
kernels: Arc<RwLock<HashMap<String, CompiledKernel>>>,
memory_pool: Arc<Mutex<GpuMemoryPool>>,
}
#[derive(Debug, Clone)]
pub struct CompiledKernel {
kernel_id: u64,
name: String,
source_code: String,
device_id: u32,
backend: GpuBackend,
work_group_size: usize,
}
#[derive(Debug)]
pub struct GpuMemoryPool {
free_buffers: Vec<GpuBuffer>,
allocated_bytes: usize,
peak_allocation: usize,
backend: GpuBackend,
}
impl GpuMemoryPool {
fn new(backend: GpuBackend) -> Self {
Self {
free_buffers: Vec::new(),
allocated_bytes: 0,
peak_allocation: 0,
backend,
}
}
fn allocate(&mut self, device_id: u32, size: usize) -> QuantRS2Result<GpuBuffer> {
if let Some(pos) = self.free_buffers.iter().position(|buf|
buf.device_id == device_id && buf.size >= size) {
return Ok(self.free_buffers.remove(pos));
}
let buffer = GpuBuffer::new(device_id, self.backend, size, false)?;
self.allocated_bytes += size;
self.peak_allocation = self.peak_allocation.max(self.allocated_bytes);
Ok(buffer)
}
fn deallocate(&mut self, buffer: GpuBuffer) {
self.allocated_bytes = self.allocated_bytes.saturating_sub(buffer.size);
self.free_buffers.push(buffer);
}
}
impl GpuContext {
pub fn new(backend: GpuBackend) -> QuantRS2Result<Self> {
let devices = Self::discover_devices(backend)?;
if devices.is_empty() {
return Err(QuantRS2Error::NoHardwareAvailable(
format!("No {} devices found", backend)
));
}
Ok(Self {
active_device: Some(devices[0].id),
devices,
backend,
kernels: Arc::new(RwLock::new(HashMap::new())),
memory_pool: Arc::new(Mutex::new(GpuMemoryPool::new(backend))),
})
}
fn discover_devices(backend: GpuBackend) -> QuantRS2Result<Vec<GpuDevice>> {
match backend {
GpuBackend::CUDA => Ok(vec![
GpuDevice {
id: 0,
name: "NVIDIA GeForce RTX 4090".to_string(),
backend,
memory_size: 24 * 1024 * 1024 * 1024, compute_units: 128,
max_work_group_size: 1024,
supports_double_precision: true,
is_available: true,
},
GpuDevice {
id: 1,
name: "NVIDIA A100".to_string(),
backend,
memory_size: 80 * 1024 * 1024 * 1024, compute_units: 108,
max_work_group_size: 1024,
supports_double_precision: true,
is_available: true,
},
]),
GpuBackend::OpenCL => Ok(vec![
GpuDevice {
id: 0,
name: "Intel UHD Graphics".to_string(),
backend,
memory_size: 8 * 1024 * 1024 * 1024, compute_units: 24,
max_work_group_size: 256,
supports_double_precision: false,
is_available: true,
},
]),
GpuBackend::ROCm => Ok(vec![
GpuDevice {
id: 0,
name: "AMD Radeon RX 7900 XTX".to_string(),
backend,
memory_size: 24 * 1024 * 1024 * 1024, compute_units: 96,
max_work_group_size: 1024,
supports_double_precision: true,
is_available: true,
},
]),
_ => Ok(vec![]),
}
}
pub fn set_active_device(&mut self, device_id: u32) -> QuantRS2Result<()> {
if !self.devices.iter().any(|d| d.id == device_id && d.is_available) {
return Err(QuantRS2Error::InvalidParameter(
format!("Device {} not available", device_id)
));
}
self.active_device = Some(device_id);
Ok(())
}
pub fn active_device(&self) -> Option<&GpuDevice> {
self.active_device.and_then(|id|
self.devices.iter().find(|d| d.id == id))
}
pub fn compile_kernel(&self, name: &str, source: &str) -> QuantRS2Result<()> {
let device_id = self.active_device.ok_or_else(||
QuantRS2Error::InvalidOperation("No active device".to_string()))?;
let kernel = CompiledKernel {
kernel_id: rand::random(),
name: name.to_string(),
source_code: source.to_string(),
device_id,
backend: self.backend,
work_group_size: 256, };
println!("Compiling kernel '{}' for {} device {}", name, self.backend, device_id);
self.kernels
.write()
.map_err(|e| QuantRS2Error::LockPoisoned(format!("Kernels RwLock poisoned: {e}")))?
.insert(name.to_string(), kernel);
Ok(())
}
pub fn execute_kernel(&self, name: &str, buffers: &[&GpuBuffer], params: &[f64]) -> QuantRS2Result<()> {
let kernels = self
.kernels
.read()
.map_err(|e| QuantRS2Error::LockPoisoned(format!("Kernels RwLock poisoned: {e}")))?;
let kernel = kernels.get(name).ok_or_else(||
QuantRS2Error::InvalidOperation(format!("Kernel '{}' not found", name)))?;
println!("Executing kernel '{}' with {} buffers and {} parameters",
name, buffers.len(), params.len());
std::thread::sleep(std::time::Duration::from_millis(1));
Ok(())
}
pub fn allocate_buffer(&self, size: usize) -> QuantRS2Result<GpuBuffer> {
let device_id = self.active_device.ok_or_else(||
QuantRS2Error::InvalidOperation("No active device".to_string()))?;
self.memory_pool
.lock()
.map_err(|e| QuantRS2Error::LockPoisoned(format!("Memory pool Mutex poisoned: {e}")))?
.allocate(device_id, size)
}
pub fn deallocate_buffer(&self, buffer: GpuBuffer) -> QuantRS2Result<()> {
self.memory_pool
.lock()
.map_err(|e| QuantRS2Error::LockPoisoned(format!("Memory pool Mutex poisoned: {e}")))?
.deallocate(buffer);
Ok(())
}
}
#[derive(Debug)]
pub struct GpuStateVectorSimulator {
context: Arc<GpuContext>,
state_buffer: Option<GpuBuffer>,
temp_buffer: Option<GpuBuffer>,
num_qubits: usize,
state_size: usize,
}
impl GpuStateVectorSimulator {
pub fn new(context: Arc<GpuContext>, num_qubits: usize) -> QuantRS2Result<Self> {
if num_qubits > 50 {
return Err(QuantRS2Error::UnsupportedQubits(
num_qubits,
"Maximum 50 qubits supported for state vector simulation".to_string()
));
}
let state_size = 1 << num_qubits;
let buffer_size = state_size * std::mem::size_of::<Complex64>();
context.compile_kernel("apply_single_qubit_gate", SINGLE_QUBIT_GATE_KERNEL)?;
context.compile_kernel("apply_two_qubit_gate", TWO_QUBIT_GATE_KERNEL)?;
context.compile_kernel("apply_phase_rotation", PHASE_ROTATION_KERNEL)?;
context.compile_kernel("compute_expectation", EXPECTATION_VALUE_KERNEL)?;
Ok(Self {
context,
state_buffer: None,
temp_buffer: None,
num_qubits,
state_size,
})
}
pub fn initialize_state(&mut self, initial_state: &[Complex64]) -> QuantRS2Result<()> {
if initial_state.len() != self.state_size {
return Err(QuantRS2Error::InvalidInput(
format!("Expected {} amplitudes, got {}", self.state_size, initial_state.len())
));
}
let buffer_size = self.state_size * std::mem::size_of::<Complex64>();
let mut state_buffer = self.context.allocate_buffer(buffer_size)?;
state_buffer.copy_from_host(initial_state)?;
self.state_buffer = Some(state_buffer);
let temp_buffer = self.context.allocate_buffer(buffer_size)?;
self.temp_buffer = Some(temp_buffer);
Ok(())
}
pub fn apply_single_qubit_gate(&mut self, qubit: usize, gate_matrix: &[Complex64]) -> QuantRS2Result<()> {
if gate_matrix.len() != 4 {
return Err(QuantRS2Error::InvalidInput("Single-qubit gate must be 2x2".to_string()));
}
if qubit >= self.num_qubits {
return Err(QuantRS2Error::InvalidQubitId(qubit as u32));
}
let state_buffer = self.state_buffer.as_ref().ok_or_else(||
QuantRS2Error::InvalidOperation("State not initialized".to_string()))?;
let gate_params = vec![
gate_matrix[0].re, gate_matrix[0].im,
gate_matrix[1].re, gate_matrix[1].im,
gate_matrix[2].re, gate_matrix[2].im,
gate_matrix[3].re, gate_matrix[3].im,
qubit as f64,
];
self.context.execute_kernel("apply_single_qubit_gate", &[state_buffer], &gate_params)?;
Ok(())
}
pub fn apply_two_qubit_gate(&mut self, control: usize, target: usize, gate_matrix: &[Complex64]) -> QuantRS2Result<()> {
if gate_matrix.len() != 16 {
return Err(QuantRS2Error::InvalidInput("Two-qubit gate must be 4x4".to_string()));
}
if control >= self.num_qubits || target >= self.num_qubits {
return Err(QuantRS2Error::InvalidQubitId(control.max(target) as u32));
}
let state_buffer = self.state_buffer.as_ref().ok_or_else(||
QuantRS2Error::InvalidOperation("State not initialized".to_string()))?;
let mut gate_params = Vec::with_capacity(34);
for elem in gate_matrix {
gate_params.push(elem.re);
gate_params.push(elem.im);
}
gate_params.push(control as f64);
gate_params.push(target as f64);
self.context.execute_kernel("apply_two_qubit_gate", &[state_buffer], &gate_params)?;
Ok(())
}
pub fn apply_global_phase(&mut self, phase: f64) -> QuantRS2Result<()> {
let state_buffer = self.state_buffer.as_ref().ok_or_else(||
QuantRS2Error::InvalidOperation("State not initialized".to_string()))?;
self.context.execute_kernel("apply_phase_rotation", &[state_buffer], &[phase])?;
Ok(())
}
pub fn expectation_value(&self, pauli_string: &str) -> QuantRS2Result<f64> {
if pauli_string.len() != self.num_qubits {
return Err(QuantRS2Error::InvalidInput(
format!("Pauli string length {} must match number of qubits {}",
pauli_string.len(), self.num_qubits)
));
}
let state_buffer = self.state_buffer.as_ref().ok_or_else(||
QuantRS2Error::InvalidOperation("State not initialized".to_string()))?;
let mut pauli_encoding = Vec::new();
for c in pauli_string.chars() {
match c {
'I' => pauli_encoding.push(0.0),
'X' => pauli_encoding.push(1.0),
'Y' => pauli_encoding.push(2.0),
'Z' => pauli_encoding.push(3.0),
_ => return Err(QuantRS2Error::InvalidInput(
format!("Invalid Pauli operator: {}", c)
)),
}
}
self.context.execute_kernel("compute_expectation", &[state_buffer], &pauli_encoding)?;
Ok(0.5) }
pub fn get_state(&self) -> QuantRS2Result<Vec<Complex64>> {
let state_buffer = self.state_buffer.as_ref().ok_or_else(||
QuantRS2Error::InvalidOperation("State not initialized".to_string()))?;
let mut state = vec![Complex64::new(0.0, 0.0); self.state_size];
state_buffer.copy_to_host(&mut state)?;
Ok(state)
}
pub fn get_probabilities(&self) -> QuantRS2Result<Vec<f64>> {
let state = self.get_state()?;
Ok(state.iter().map(|amp| amp.norm_sqr()).collect())
}
}
#[derive(Debug)]
pub struct GpuTensorNetworkContractor {
context: Arc<GpuContext>,
tensor_buffers: HashMap<usize, GpuBuffer>,
contraction_cache: HashMap<String, Vec<Complex64>>,
}
impl GpuTensorNetworkContractor {
pub fn new(context: Arc<GpuContext>) -> QuantRS2Result<Self> {
context.compile_kernel("contract_tensors", TENSOR_CONTRACTION_KERNEL)?;
context.compile_kernel("tensor_svd", TENSOR_SVD_KERNEL)?;
context.compile_kernel("tensor_qr", TENSOR_QR_KERNEL)?;
Ok(Self {
context,
tensor_buffers: HashMap::new(),
contraction_cache: HashMap::new(),
})
}
pub fn upload_tensor(&mut self, tensor: &Tensor) -> QuantRS2Result<()> {
let data_size = tensor.data.len() * std::mem::size_of::<Complex64>();
let mut buffer = self.context.allocate_buffer(data_size)?;
let flattened: Vec<Complex64> = tensor.data.iter().cloned().collect();
buffer.copy_from_host(&flattened)?;
self.tensor_buffers.insert(tensor.id, buffer);
Ok(())
}
pub fn contract_tensors(&mut self, tensor1_id: usize, tensor2_id: usize,
contract_indices: &[(usize, usize)]) -> QuantRS2Result<Vec<Complex64>> {
let buffer1 = self.tensor_buffers.get(&tensor1_id).ok_or_else(||
QuantRS2Error::InvalidOperation(format!("Tensor {} not found on GPU", tensor1_id)))?;
let buffer2 = self.tensor_buffers.get(&tensor2_id).ok_or_else(||
QuantRS2Error::InvalidOperation(format!("Tensor {} not found on GPU", tensor2_id)))?;
let mut params = Vec::new();
for (i, j) in contract_indices {
params.push(*i as f64);
params.push(*j as f64);
}
self.context.execute_kernel("contract_tensors", &[buffer1, buffer2], ¶ms)?;
Ok(vec![Complex64::new(1.0, 0.0)]) }
pub fn tensor_svd(&self, tensor_id: usize, split_index: usize) -> QuantRS2Result<(Vec<Complex64>, Vec<f64>, Vec<Complex64>)> {
let buffer = self.tensor_buffers.get(&tensor_id).ok_or_else(||
QuantRS2Error::InvalidOperation(format!("Tensor {} not found on GPU", tensor_id)))?;
self.context.execute_kernel("tensor_svd", &[buffer], &[split_index as f64])?;
Ok((
vec![Complex64::new(1.0, 0.0)], vec![1.0], vec![Complex64::new(1.0, 0.0)], ))
}
pub fn optimize_contraction_order(&self, network: &TensorNetwork) -> QuantRS2Result<Vec<(usize, usize)>> {
let tensor_ids: Vec<usize> = network.tensors.keys().cloned().collect();
let mut order = Vec::new();
for i in 0..tensor_ids.len() - 1 {
order.push((tensor_ids[i], tensor_ids[i + 1]));
}
Ok(order)
}
}
#[derive(Debug, Clone)]
pub struct GpuPerformanceMonitor {
operation_times: HashMap<String, Vec<f64>>,
memory_usage: Vec<usize>,
kernel_launches: u64,
memory_transfers: u64,
}
impl GpuPerformanceMonitor {
pub fn new() -> Self {
Self {
operation_times: HashMap::new(),
memory_usage: Vec::new(),
kernel_launches: 0,
memory_transfers: 0,
}
}
pub fn record_operation(&mut self, name: &str, duration_ms: f64) {
self.operation_times.entry(name.to_string()).or_insert_with(Vec::new).push(duration_ms);
}
pub fn record_memory_usage(&mut self, bytes: usize) {
self.memory_usage.push(bytes);
}
pub fn record_kernel_launch(&mut self) {
self.kernel_launches += 1;
}
pub fn record_memory_transfer(&mut self) {
self.memory_transfers += 1;
}
pub fn get_average_time(&self, operation: &str) -> Option<f64> {
self.operation_times.get(operation).map(|times| {
times.iter().sum::<f64>() / times.len() as f64
})
}
pub fn get_peak_memory_usage(&self) -> Option<usize> {
self.memory_usage.iter().max().cloned()
}
pub fn get_stats(&self) -> GpuPerformanceStats {
GpuPerformanceStats {
total_kernel_launches: self.kernel_launches,
total_memory_transfers: self.memory_transfers,
peak_memory_usage: self.get_peak_memory_usage().unwrap_or(0),
operation_averages: self.operation_times.iter()
.map(|(name, times)| (name.clone(), times.iter().sum::<f64>() / times.len() as f64))
.collect(),
}
}
}
#[derive(Debug, Clone)]
pub struct GpuPerformanceStats {
pub total_kernel_launches: u64,
pub total_memory_transfers: u64,
pub peak_memory_usage: usize,
pub operation_averages: HashMap<String, f64>,
}
const SINGLE_QUBIT_GATE_KERNEL: &str = r#"
__global__ void apply_single_qubit_gate(Complex* state, Complex* gate, int qubit, int n_qubits) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int total_states = 1 << n_qubits;
if (idx >= total_states / 2) return;
int qubit_mask = 1 << qubit;
int state_0 = (idx & ~qubit_mask) | ((idx & (qubit_mask - 1)));
int state_1 = state_0 | qubit_mask;
Complex amp_0 = state[state_0];
Complex amp_1 = state[state_1];
state[state_0] = gate[0] * amp_0 + gate[1] * amp_1;
state[state_1] = gate[2] * amp_0 + gate[3] * amp_1;
}
"#;
const TWO_QUBIT_GATE_KERNEL: &str = r#"
__global__ void apply_two_qubit_gate(Complex* state, Complex* gate, int control, int target, int n_qubits) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int total_states = 1 << n_qubits;
if (idx >= total_states / 4) return;
// Implementation would handle two-qubit gate application
}
"#;
const PHASE_ROTATION_KERNEL: &str = r#"
__global__ void apply_phase_rotation(Complex* state, double phase, int size) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx >= size) return;
Complex phase_factor = make_cuDoubleComplex(cos(phase), sin(phase));
state[idx] = cuCmul(state[idx], phase_factor);
}
"#;
const EXPECTATION_VALUE_KERNEL: &str = r#"
__global__ void compute_expectation(Complex* state, double* paulis, double* result, int n_qubits) {
// Implementation would compute expectation value of Pauli string
}
"#;
const TENSOR_CONTRACTION_KERNEL: &str = r#"
__global__ void contract_tensors(Complex* tensor1, Complex* tensor2,
int* indices, Complex* result,
int* shape1, int* shape2) {
// Implementation would perform tensor contraction
}
"#;
const TENSOR_SVD_KERNEL: &str = r#"
__global__ void tensor_svd(Complex* tensor, Complex* U, double* S, Complex* Vt,
int rows, int cols) {
// Implementation would perform SVD decomposition
}
"#;
const TENSOR_QR_KERNEL: &str = r#"
__global__ void tensor_qr(Complex* tensor, Complex* Q, Complex* R,
int rows, int cols) {
// Implementation would perform QR decomposition
}
"#;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gpu_context_creation() {
let context = GpuContext::new(GpuBackend::CUDA);
assert!(context.is_ok());
let context = context.expect("CUDA context should be created successfully");
assert!(!context.devices.is_empty());
assert!(context.active_device.is_some());
}
#[test]
fn test_gpu_buffer_allocation() {
let context =
GpuContext::new(GpuBackend::CUDA).expect("CUDA context should be created successfully");
let buffer = context.allocate_buffer(1024);
assert!(buffer.is_ok());
let buffer = buffer.expect("Buffer should be allocated successfully");
assert_eq!(buffer.size, 1024);
}
#[test]
fn test_state_vector_simulator() {
let context = Arc::new(
GpuContext::new(GpuBackend::CUDA).expect("CUDA context should be created successfully"),
);
let mut simulator = GpuStateVectorSimulator::new(context, 3)
.expect("State vector simulator should be created successfully");
let initial_state = vec![
Complex64::new(1.0, 0.0), Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0), Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0), Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0), Complex64::new(0.0, 0.0),
];
assert!(simulator.initialize_state(&initial_state).is_ok());
let hadamard = vec![
Complex64::new(1.0/2.0_f64.sqrt(), 0.0), Complex64::new(1.0/2.0_f64.sqrt(), 0.0),
Complex64::new(1.0/2.0_f64.sqrt(), 0.0), Complex64::new(-1.0/2.0_f64.sqrt(), 0.0),
];
assert!(simulator.apply_single_qubit_gate(0, &hadamard).is_ok());
}
#[test]
fn test_tensor_network_contractor() {
let context = Arc::new(
GpuContext::new(GpuBackend::CUDA).expect("CUDA context should be created successfully"),
);
let mut contractor = GpuTensorNetworkContractor::new(context)
.expect("Tensor network contractor should be created successfully");
let data = scirs2_core::ndarray::Array::from_shape_vec(
IxDyn(&[2, 2]),
vec![
Complex64::new(1.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(1.0, 0.0),
],
)
.expect("Array should be created from valid shape and data");
let tensor = Tensor::new(0, data, vec!["i".to_string(), "j".to_string()]);
assert!(contractor.upload_tensor(&tensor).is_ok());
}
#[test]
fn test_performance_monitor() {
let mut monitor = GpuPerformanceMonitor::new();
monitor.record_operation("gate_application", 1.5);
monitor.record_operation("gate_application", 2.0);
monitor.record_memory_usage(1024);
monitor.record_kernel_launch();
assert_eq!(monitor.get_average_time("gate_application"), Some(1.75));
assert_eq!(monitor.get_peak_memory_usage(), Some(1024));
let stats = monitor.get_stats();
assert_eq!(stats.total_kernel_launches, 1);
}
#[test]
fn test_unsupported_backend() {
let context = GpuContext::new(GpuBackend::WebGPU);
if let Err(e) = context {
assert!(matches!(e, QuantRS2Error::NoHardwareAvailable(_)));
}
}
#[test]
fn test_invalid_qubit_operations() {
let context = Arc::new(
GpuContext::new(GpuBackend::CUDA).expect("CUDA context should be created successfully"),
);
let mut simulator = GpuStateVectorSimulator::new(context, 2)
.expect("State vector simulator should be created successfully");
let initial_state = vec![
Complex64::new(1.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0),
Complex64::new(0.0, 0.0),
];
simulator
.initialize_state(&initial_state)
.expect("State should be initialized successfully");
let hadamard = vec![
Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
Complex64::new(-1.0 / 2.0_f64.sqrt(), 0.0),
];
let result = simulator.apply_single_qubit_gate(5, &hadamard);
assert!(result.is_err());
let err = result.expect_err("Expected InvalidQubitId error for qubit 5");
assert!(matches!(err, QuantRS2Error::InvalidQubitId(_)));
}
#[test]
fn test_expectation_value_calculation() {
let context = Arc::new(
GpuContext::new(GpuBackend::CUDA).expect("CUDA context should be created successfully"),
);
let simulator = GpuStateVectorSimulator::new(context, 3)
.expect("State vector simulator should be created successfully");
let result = simulator.expectation_value("XYZ");
assert!(result.is_ok());
let result = simulator.expectation_value("XY");
assert!(result.is_err());
let result = simulator.expectation_value("ABC");
assert!(result.is_err());
}
}