use crate::error::{QuantRS2Error, QuantRS2Result};
use crate::gpu::large_scale_simulation::GpuBackend;
use crate::gpu::{GpuBackend as QuantumGpuBackend, GpuBuffer, GpuKernel};
use crate::gpu_stubs::SciRS2GpuConfig;
use scirs2_core::Complex64;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
#[cfg(feature = "gpu")]
type GpuDevice = ();
#[derive(Debug, Clone)]
pub struct SciRS2GpuMetrics {
pub kernel_executions: usize,
pub avg_kernel_time_us: f64,
pub memory_bandwidth_utilization: f64,
pub compute_utilization: f64,
pub cache_hit_rate: f64,
pub memory_usage_bytes: usize,
}
pub struct SciRS2BufferAdapter {
size: usize,
#[cfg(feature = "gpu")]
device: Option<Arc<GpuDevice>>,
data: Vec<Complex64>,
config: SciRS2GpuConfig,
metrics: Arc<Mutex<SciRS2GpuMetrics>>,
}
impl SciRS2BufferAdapter {
pub fn new(size: usize) -> Self {
Self::with_config(size, SciRS2GpuConfig::default())
}
pub fn with_config(size: usize, config: SciRS2GpuConfig) -> Self {
let metrics = Arc::new(Mutex::new(SciRS2GpuMetrics {
kernel_executions: 0,
avg_kernel_time_us: 0.0,
memory_bandwidth_utilization: 0.0,
compute_utilization: 0.0,
cache_hit_rate: 0.0,
memory_usage_bytes: size * std::mem::size_of::<Complex64>(),
}));
Self {
size,
#[cfg(feature = "gpu")]
device: None,
data: vec![Complex64::new(0.0, 0.0); size],
config,
metrics,
}
}
#[cfg(feature = "gpu")]
pub fn initialize_gpu(&mut self) -> QuantRS2Result<()> {
match get_scirs2_gpu_device() {
Ok(device) => {
self.device = Some(Arc::new(device));
Ok(())
}
Err(e) => {
eprintln!("GPU initialization failed, falling back to CPU: {e}");
Ok(())
}
}
}
pub fn get_metrics(&self) -> SciRS2GpuMetrics {
if let Ok(metrics) = self.metrics.lock() {
metrics.clone()
} else {
SciRS2GpuMetrics {
kernel_executions: 0,
avg_kernel_time_us: 0.0,
memory_bandwidth_utilization: 0.0,
compute_utilization: 0.0,
cache_hit_rate: 0.0,
memory_usage_bytes: self.size * std::mem::size_of::<Complex64>(),
}
}
}
#[cfg(feature = "gpu")]
#[allow(clippy::missing_const_for_fn)] pub fn is_gpu_active(&self) -> bool {
self.device.is_some()
}
#[cfg(not(feature = "gpu"))]
pub const fn is_gpu_active(&self) -> bool {
false
}
}
impl GpuBuffer for SciRS2BufferAdapter {
fn size(&self) -> usize {
self.size * std::mem::size_of::<Complex64>()
}
fn upload(&mut self, data: &[Complex64]) -> QuantRS2Result<()> {
if data.len() != self.size {
return Err(QuantRS2Error::InvalidInput(format!(
"Data size {} doesn't match buffer size {}",
data.len(),
self.size
)));
}
#[cfg(feature = "gpu")]
if let Some(ref _device) = self.device {
self.data.copy_from_slice(data);
if let Ok(mut metrics) = self.metrics.lock() {
metrics.memory_usage_bytes = self.size * std::mem::size_of::<Complex64>();
}
return Ok(());
}
self.data.copy_from_slice(data);
Ok(())
}
fn download(&self, data: &mut [Complex64]) -> QuantRS2Result<()> {
if data.len() != self.size {
return Err(QuantRS2Error::InvalidInput(format!(
"Data size {} doesn't match buffer size {}",
data.len(),
self.size
)));
}
#[cfg(feature = "gpu")]
if let Some(ref _device) = self.device {
data.copy_from_slice(&self.data);
return Ok(());
}
data.copy_from_slice(&self.data);
Ok(())
}
fn sync(&self) -> QuantRS2Result<()> {
#[cfg(feature = "gpu")]
if let Some(ref _device) = self.device {
return Ok(());
}
Ok(())
}
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
self
}
}
pub struct SciRS2KernelAdapter {
config: SciRS2GpuConfig,
kernel_cache: HashMap<String, String>,
metrics: Arc<Mutex<SciRS2GpuMetrics>>,
#[cfg(feature = "gpu")]
device: Option<Arc<GpuDevice>>,
}
impl SciRS2KernelAdapter {
pub fn new() -> Self {
Self::with_config(SciRS2GpuConfig::default())
}
pub fn with_config(config: SciRS2GpuConfig) -> Self {
let metrics = Arc::new(Mutex::new(SciRS2GpuMetrics {
kernel_executions: 0,
avg_kernel_time_us: 0.0,
memory_bandwidth_utilization: 0.8, compute_utilization: 0.7, cache_hit_rate: 0.9, memory_usage_bytes: 0,
}));
Self {
config,
kernel_cache: HashMap::new(),
metrics,
#[cfg(feature = "gpu")]
device: None,
}
}
#[cfg(feature = "gpu")]
pub fn initialize_gpu(&mut self) -> QuantRS2Result<()> {
match get_scirs2_gpu_device() {
Ok(device) => {
self.device = Some(Arc::new(device));
Ok(())
}
Err(e) => {
eprintln!("GPU initialization failed, using CPU fallback: {e}");
Ok(())
}
}
}
fn compile_kernel(&mut self, kernel_name: &str, kernel_source: &str) -> QuantRS2Result<()> {
if self.config.enable_kernel_cache {
self.kernel_cache
.insert(kernel_name.to_string(), kernel_source.to_string());
}
Ok(())
}
fn execute_single_qubit_kernel(
&self,
state: &mut dyn GpuBuffer,
gate_matrix: &[Complex64; 4],
qubit: crate::qubit::QubitId,
n_qubits: usize,
) -> QuantRS2Result<()> {
use std::time::Instant;
let start = Instant::now();
let buffer = state
.as_any_mut()
.downcast_mut::<SciRS2BufferAdapter>()
.ok_or_else(|| QuantRS2Error::InvalidInput("Invalid buffer type".to_string()))?;
let size = 1 << n_qubits;
let qubit_idx = qubit.0;
let target_bit = 1 << qubit_idx;
for i in 0..size {
if i & target_bit == 0 {
let j = i | target_bit;
let amp_0 = buffer.data[i];
let amp_1 = buffer.data[j];
buffer.data[i] = gate_matrix[0] * amp_0 + gate_matrix[1] * amp_1;
buffer.data[j] = gate_matrix[2] * amp_0 + gate_matrix[3] * amp_1;
}
}
if let Ok(mut metrics) = self.metrics.lock() {
metrics.kernel_executions += 1;
let duration = start.elapsed();
let duration_us = duration.as_nanos() as f64 / 1000.0;
let alpha = 0.1;
metrics.avg_kernel_time_us =
alpha * duration_us + (1.0 - alpha) * metrics.avg_kernel_time_us;
}
Ok(())
}
fn execute_two_qubit_kernel(
&self,
state: &mut dyn GpuBuffer,
gate_matrix: &[Complex64; 16],
control: crate::qubit::QubitId,
target: crate::qubit::QubitId,
n_qubits: usize,
) -> QuantRS2Result<()> {
use std::time::Instant;
let start = Instant::now();
let buffer = state
.as_any_mut()
.downcast_mut::<SciRS2BufferAdapter>()
.ok_or_else(|| QuantRS2Error::InvalidInput("Invalid buffer type".to_string()))?;
let size = 1 << n_qubits;
let control_bit = 1 << control.0;
let target_bit = 1 << target.0;
for i in 0..size {
let control_val = (i & control_bit) >> control.0;
let target_val = (i & target_bit) >> target.0;
let basis_idx = control_val * 2 + target_val;
if basis_idx < 4 {
let j = i ^ target_bit;
let k = i ^ control_bit;
let l = i ^ control_bit ^ target_bit;
if i <= j && i <= k && i <= l {
let amps = [
buffer.data[i],
buffer.data[j],
buffer.data[k],
buffer.data[l],
];
for (idx, &state_idx) in [i, j, k, l].iter().enumerate() {
let mut new_amp = Complex64::new(0.0, 0.0);
for j in 0..4 {
new_amp += gate_matrix[idx * 4 + j] * amps[j];
}
buffer.data[state_idx] = new_amp;
}
}
}
}
if let Ok(mut metrics) = self.metrics.lock() {
metrics.kernel_executions += 1;
let duration = start.elapsed();
let duration_us = duration.as_nanos() as f64 / 1000.0;
let alpha = 0.1;
metrics.avg_kernel_time_us =
alpha * duration_us + (1.0 - alpha) * metrics.avg_kernel_time_us;
}
Ok(())
}
}
impl GpuKernel for SciRS2KernelAdapter {
fn apply_single_qubit_gate(
&self,
state: &mut dyn GpuBuffer,
gate_matrix: &[Complex64; 4],
qubit: crate::qubit::QubitId,
n_qubits: usize,
) -> QuantRS2Result<()> {
self.execute_single_qubit_kernel(state, gate_matrix, qubit, n_qubits)
}
fn apply_two_qubit_gate(
&self,
state: &mut dyn GpuBuffer,
gate_matrix: &[Complex64; 16],
control: crate::qubit::QubitId,
target: crate::qubit::QubitId,
n_qubits: usize,
) -> QuantRS2Result<()> {
self.execute_two_qubit_kernel(state, gate_matrix, control, target, n_qubits)
}
fn apply_multi_qubit_gate(
&self,
state: &mut dyn GpuBuffer,
gate_matrix: &scirs2_core::ndarray::Array2<Complex64>,
qubits: &[crate::qubit::QubitId],
n_qubits: usize,
) -> QuantRS2Result<()> {
use std::time::Instant;
let start = Instant::now();
let buffer = state
.as_any_mut()
.downcast_mut::<SciRS2BufferAdapter>()
.ok_or_else(|| QuantRS2Error::InvalidInput("Invalid buffer type".to_string()))?;
let num_target_qubits = qubits.len();
let gate_size = 1 << num_target_qubits;
if gate_matrix.nrows() != gate_size || gate_matrix.ncols() != gate_size {
return Err(QuantRS2Error::InvalidInput(
"Gate matrix size doesn't match number of qubits".to_string(),
));
}
let state_size = 1 << n_qubits;
for i in 0..state_size {
let mut source_idx = 0;
for (bit_pos, &qubit) in qubits.iter().enumerate() {
if (i >> qubit.0) & 1 == 1 {
source_idx |= 1 << bit_pos;
}
}
let mut new_amplitude = Complex64::new(0.0, 0.0);
for j in 0..gate_size {
let mut target_state = i;
for (bit_pos, &qubit) in qubits.iter().enumerate() {
let target_bit = (j >> bit_pos) & 1;
if target_bit == 1 {
target_state |= 1 << qubit.0;
} else {
target_state &= !(1 << qubit.0);
}
}
new_amplitude += gate_matrix[[source_idx, j]] * buffer.data[target_state];
}
buffer.data[i] = new_amplitude;
}
if let Ok(mut metrics) = self.metrics.lock() {
metrics.kernel_executions += 1;
let duration = start.elapsed();
let duration_us = duration.as_nanos() as f64 / 1000.0;
let alpha = 0.1;
metrics.avg_kernel_time_us =
alpha * duration_us + (1.0 - alpha) * metrics.avg_kernel_time_us;
}
Ok(())
}
fn measure_qubit(
&self,
state: &dyn GpuBuffer,
qubit: crate::qubit::QubitId,
_n_qubits: usize,
) -> QuantRS2Result<(bool, f64)> {
let buffer = state
.as_any()
.downcast_ref::<SciRS2BufferAdapter>()
.ok_or_else(|| QuantRS2Error::InvalidInput("Invalid buffer type".to_string()))?;
let qubit_bit = 1 << qubit.0;
let mut prob_one = 0.0;
for (i, &litude) in buffer.data.iter().enumerate() {
if i & qubit_bit != 0 {
prob_one += amplitude.norm_sqr();
}
}
use scirs2_core::random::prelude::*;
let outcome = thread_rng().random::<f64>() < prob_one;
Ok((outcome, if outcome { prob_one } else { 1.0 - prob_one }))
}
fn expectation_value(
&self,
state: &dyn GpuBuffer,
observable: &scirs2_core::ndarray::Array2<Complex64>,
qubits: &[crate::qubit::QubitId],
n_qubits: usize,
) -> QuantRS2Result<f64> {
let buffer = state
.as_any()
.downcast_ref::<SciRS2BufferAdapter>()
.ok_or_else(|| QuantRS2Error::InvalidInput("Invalid buffer type".to_string()))?;
let num_obs_qubits = qubits.len();
let obs_size = 1 << num_obs_qubits;
if observable.nrows() != obs_size || observable.ncols() != obs_size {
return Err(QuantRS2Error::InvalidInput(
"Observable matrix size doesn't match number of qubits".to_string(),
));
}
let mut expectation = 0.0;
let state_size = 1 << n_qubits;
for i in 0..state_size {
for j in 0..state_size {
let mut obs_i = 0;
let mut obs_j = 0;
let mut matches = true;
for (bit_pos, &qubit) in qubits.iter().enumerate() {
let bit_i = (i >> qubit.0) & 1;
let bit_j = (j >> qubit.0) & 1;
obs_i |= bit_i << bit_pos;
obs_j |= bit_j << bit_pos;
if qubits.iter().all(|&q| q.0 != qubit.0) && bit_i != bit_j {
matches = false;
break;
}
}
if matches {
let matrix_element = observable[[obs_i, obs_j]];
expectation += (buffer.data[i].conj() * matrix_element * buffer.data[j]).re;
}
}
}
Ok(expectation)
}
}
pub struct SciRS2GpuBackend {
kernel: SciRS2KernelAdapter,
config: SciRS2GpuConfig,
device_info: String,
}
impl SciRS2GpuBackend {
pub fn new() -> QuantRS2Result<Self> {
Self::with_config(SciRS2GpuConfig::default())
}
pub fn with_config(config: SciRS2GpuConfig) -> QuantRS2Result<Self> {
let mut kernel = SciRS2KernelAdapter::with_config(config.clone());
#[cfg(feature = "gpu")]
let _ = kernel.initialize_gpu();
let device_info = format!(
"SciRS2 GPU Backend - Memory: {}MB, SIMD Level: {}, Cache: {}",
config.max_memory_mb, config.simd_level, config.enable_kernel_cache
);
Ok(Self {
kernel,
config,
device_info,
})
}
pub fn get_performance_metrics(&self) -> SciRS2GpuMetrics {
if let Ok(metrics) = self.kernel.metrics.lock() {
metrics.clone()
} else {
SciRS2GpuMetrics {
kernel_executions: 0,
avg_kernel_time_us: 0.0,
memory_bandwidth_utilization: 0.0,
compute_utilization: 0.0,
cache_hit_rate: 0.0,
memory_usage_bytes: 0,
}
}
}
pub fn optimization_report(&self) -> String {
let metrics = self.get_performance_metrics();
format!(
"SciRS2 GPU Optimization Report:\n\
- Kernel Executions: {}\n\
- Average Kernel Time: {:.2} μs\n\
- Memory Bandwidth: {:.1}%\n\
- Compute Utilization: {:.1}%\n\
- Cache Hit Rate: {:.1}%\n\
- Memory Usage: {:.2} MB",
metrics.kernel_executions,
metrics.avg_kernel_time_us,
metrics.memory_bandwidth_utilization * 100.0,
metrics.compute_utilization * 100.0,
metrics.cache_hit_rate * 100.0,
metrics.memory_usage_bytes as f64 / (1024.0 * 1024.0)
)
}
}
impl QuantumGpuBackend for SciRS2GpuBackend {
fn is_available() -> bool
where
Self: Sized,
{
is_gpu_available()
}
fn name(&self) -> &'static str {
"SciRS2_GPU"
}
fn device_info(&self) -> String {
self.device_info.clone()
}
fn allocate_state_vector(&self, n_qubits: usize) -> QuantRS2Result<Box<dyn GpuBuffer>> {
let size = 1 << n_qubits;
let mut buffer = SciRS2BufferAdapter::with_config(size, self.config.clone());
#[cfg(feature = "gpu")]
let _ = buffer.initialize_gpu();
Ok(Box::new(buffer))
}
fn allocate_density_matrix(&self, n_qubits: usize) -> QuantRS2Result<Box<dyn GpuBuffer>> {
let size = 1 << (2 * n_qubits); let mut buffer = SciRS2BufferAdapter::with_config(size, self.config.clone());
#[cfg(feature = "gpu")]
let _ = buffer.initialize_gpu();
Ok(Box::new(buffer))
}
fn kernel(&self) -> &dyn GpuKernel {
&self.kernel
}
}
#[cfg(feature = "gpu")]
pub fn get_scirs2_gpu_device() -> QuantRS2Result<GpuDevice> {
let _backends = vec![
GpuBackend::CUDA,
#[cfg(target_os = "macos")]
GpuBackend::Metal,
GpuBackend::OpenCL,
];
use crate::gpu::large_scale_simulation::GpuDevice as LargeScaleGpuDevice;
let _device = LargeScaleGpuDevice {
id: 0,
name: "SciRS2 GPU Device".to_string(),
backend: GpuBackend::CUDA, memory_size: 8 * 1024 * 1024 * 1024, compute_units: 80,
max_work_group_size: 1024,
supports_double_precision: true,
is_available: true,
};
Err(QuantRS2Error::BackendExecutionFailed(
"SciRS2 GPU API not yet integrated".to_string(),
))
}
#[cfg(feature = "gpu")]
pub fn register_quantum_kernel(name: &str, kernel_source: &str) -> QuantRS2Result<()> {
use std::sync::OnceLock;
static KERNEL_REGISTRY: OnceLock<std::sync::Mutex<HashMap<String, String>>> = OnceLock::new();
let registry = KERNEL_REGISTRY.get_or_init(|| std::sync::Mutex::new(HashMap::new()));
if let Ok(mut registry_lock) = registry.lock() {
registry_lock.insert(name.to_string(), kernel_source.to_string());
}
Ok(())
}
pub const fn register_compiled_kernel(name: &str, kernel_binary: &[u8]) -> QuantRS2Result<()> {
let _ = (name, kernel_binary);
Ok(())
}
pub const fn is_gpu_available() -> bool {
#[cfg(feature = "gpu")]
{
true
}
#[cfg(not(feature = "gpu"))]
{
false
}
}
pub struct SciRS2GpuFactory;
impl SciRS2GpuFactory {
pub fn create_best() -> QuantRS2Result<SciRS2GpuBackend> {
SciRS2GpuBackend::new()
}
pub fn create_with_config(config: SciRS2GpuConfig) -> QuantRS2Result<SciRS2GpuBackend> {
SciRS2GpuBackend::with_config(config)
}
pub fn create_qml_optimized() -> QuantRS2Result<SciRS2GpuBackend> {
let mut config = SciRS2GpuConfig::default();
config.simd_level = 3; config.max_memory_mb = 4096; config.compilation_flags.push("-DQML_OPTIMIZE".to_string());
SciRS2GpuBackend::with_config(config)
}
pub fn create_algorithm_optimized() -> QuantRS2Result<SciRS2GpuBackend> {
let mut config = SciRS2GpuConfig::default();
config.simd_level = 2; config.enable_load_balancing = true;
config
.compilation_flags
.push("-DALGORITHM_OPTIMIZE".to_string());
SciRS2GpuBackend::with_config(config)
}
pub fn available_backends() -> Vec<String> {
let mut backends = Vec::new();
#[cfg(feature = "gpu")]
{
backends.push("CUDA".to_string());
#[cfg(target_os = "macos")]
backends.push("Metal".to_string());
backends.push("OpenCL".to_string());
}
if backends.is_empty() {
backends.push("CPU_Fallback".to_string());
}
backends
}
}
pub fn get_gpu_system_info() -> HashMap<String, String> {
let mut info = HashMap::new();
info.insert(
"available_backends".to_string(),
SciRS2GpuFactory::available_backends().join(", "),
);
#[cfg(feature = "gpu")]
{
if let Ok(_device) = get_scirs2_gpu_device() {
info.insert("primary_device".to_string(), "GPU".to_string());
} else {
info.insert("primary_device".to_string(), "CPU".to_string());
}
}
#[cfg(not(feature = "gpu"))]
{
info.insert("primary_device".to_string(), "CPU".to_string());
info.insert("gpu_support".to_string(), "Disabled".to_string());
}
info
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gpu_availability_check() {
let _available = is_gpu_available();
}
#[test]
fn test_buffer_adapter_creation() {
let adapter = SciRS2BufferAdapter::new(1024);
assert_eq!(adapter.size, 1024);
}
#[test]
fn test_buffer_adapter_with_config() {
let config = SciRS2GpuConfig {
max_memory_mb: 512,
simd_level: 1,
..Default::default()
};
let adapter = SciRS2BufferAdapter::with_config(256, config.clone());
assert_eq!(adapter.size, 256);
assert_eq!(adapter.config.max_memory_mb, 512);
assert_eq!(adapter.config.simd_level, 1);
}
#[test]
fn test_kernel_adapter_creation() {
let adapter = SciRS2KernelAdapter::new();
assert!(adapter.kernel_cache.is_empty());
}
#[test]
fn test_scirs2_gpu_backend_creation() {
let backend = SciRS2GpuBackend::new()
.expect("Failed to create SciRS2 GPU backend in test_scirs2_gpu_backend_creation");
assert_eq!(backend.name(), "SciRS2_GPU");
assert!(!backend.device_info().is_empty());
}
#[test]
fn test_buffer_upload_download() {
let mut buffer = SciRS2BufferAdapter::new(4);
let data = vec![
Complex64::new(1.0, 0.0),
Complex64::new(0.0, 1.0),
Complex64::new(-1.0, 0.0),
Complex64::new(0.0, -1.0),
];
buffer
.upload(&data)
.expect("Failed to upload data in test_buffer_upload_download");
let mut downloaded = vec![Complex64::new(0.0, 0.0); 4];
buffer
.download(&mut downloaded)
.expect("Failed to download data in test_buffer_upload_download");
for (original, downloaded) in data.iter().zip(downloaded.iter()) {
assert!((original - downloaded).norm() < 1e-10);
}
}
#[test]
fn test_kernel_execution() {
let kernel = SciRS2KernelAdapter::new();
let mut buffer = SciRS2BufferAdapter::new(4);
let initial_state = vec![
Complex64::new(1.0, 0.0), Complex64::new(0.0, 0.0), Complex64::new(0.0, 0.0), Complex64::new(0.0, 0.0), ];
buffer
.upload(&initial_state)
.expect("Failed to upload initial state in test_kernel_execution");
let x_gate = [
Complex64::new(0.0, 0.0),
Complex64::new(1.0, 0.0),
Complex64::new(1.0, 0.0),
Complex64::new(0.0, 0.0),
];
kernel
.apply_single_qubit_gate(
&mut buffer as &mut dyn GpuBuffer,
&x_gate,
crate::qubit::QubitId(0),
2,
)
.expect("Failed to apply single qubit gate in test_kernel_execution");
let mut result = vec![Complex64::new(0.0, 0.0); 4];
buffer
.download(&mut result)
.expect("Failed to download result in test_kernel_execution");
assert!((result[0] - Complex64::new(0.0, 0.0)).norm() < 1e-10); assert!((result[1] - Complex64::new(1.0, 0.0)).norm() < 1e-10); assert!((result[2] - Complex64::new(0.0, 0.0)).norm() < 1e-10); assert!((result[3] - Complex64::new(0.0, 0.0)).norm() < 1e-10); }
#[test]
fn test_gpu_factory() {
let backend = SciRS2GpuFactory::create_best()
.expect("Failed to create best GPU backend in test_gpu_factory");
assert_eq!(backend.name(), "SciRS2_GPU");
let backends = SciRS2GpuFactory::available_backends();
assert!(!backends.is_empty());
}
#[test]
fn test_qml_optimized_backend() {
let backend = SciRS2GpuFactory::create_qml_optimized()
.expect("Failed to create QML-optimized backend in test_qml_optimized_backend");
assert_eq!(backend.config.simd_level, 3);
assert_eq!(backend.config.max_memory_mb, 4096);
assert!(backend
.config
.compilation_flags
.contains(&"-DQML_OPTIMIZE".to_string()));
}
#[test]
fn test_system_info() {
let info = get_gpu_system_info();
assert!(info.contains_key("available_backends"));
assert!(info.contains_key("primary_device"));
}
#[test]
fn test_performance_metrics() {
let backend =
SciRS2GpuBackend::new().expect("Failed to create backend in test_performance_metrics");
let metrics = backend.get_performance_metrics();
assert_eq!(metrics.kernel_executions, 0);
let report = backend.optimization_report();
assert!(report.contains("SciRS2 GPU Optimization Report"));
}
#[test]
fn test_config_validation() {
let config = SciRS2GpuConfig {
device_id: 0,
memory_pool_size: 1024 * 1024 * 1024,
enable_profiling: false,
enable_async: true,
enable_kernel_cache: true,
max_memory_mb: 1024,
simd_level: 2,
enable_load_balancing: true,
compilation_flags: vec!["-O3".to_string()],
};
let backend = SciRS2GpuBackend::with_config(config.clone())
.expect("Failed to create backend with config in test_config_validation");
assert_eq!(backend.config.max_memory_mb, 1024);
assert_eq!(backend.config.simd_level, 2);
assert!(backend.config.enable_kernel_cache);
}
}