use crate::error::{MLError, Result};
use crate::simulator_backends::{
BackendCapabilities, DynamicCircuit, GradientMethod, Observable, SimulationResult,
SimulatorBackend,
};
use scirs2_core::ndarray::{Array1, Array2};
use scirs2_core::Complex64;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
use quantrs2_sim::gpu::{GpuStateVectorSimulator, SciRS2GpuStateVectorSimulator};
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
use quantrs2_circuit::prelude::Circuit;
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
use quantrs2_core::prelude::{GateOp, QubitId};
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
pub struct GPUBackend {
device_id: usize,
max_qubits: usize,
simulators: Arc<Mutex<HashMap<usize, Arc<SciRS2GpuStateVectorSimulator>>>>,
metrics: Arc<Mutex<GPUMetrics>>,
memory_pool: Arc<GPUMemoryPool>,
}
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
#[derive(Debug)]
struct GPUMetrics {
total_circuits_executed: usize,
total_gpu_time_ms: f64,
total_memory_allocated_mb: f64,
cache_hits: usize,
cache_misses: usize,
}
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
#[derive(Debug)]
struct GPUMemoryPool {
buffers: Mutex<HashMap<usize, Vec<Vec<Complex64>>>>,
max_buffer_size: usize,
}
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
impl std::fmt::Debug for GPUBackend {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("GPUBackend")
.field("device_id", &self.device_id)
.field("max_qubits", &self.max_qubits)
.field("enable_metrics", &true)
.finish()
}
}
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
impl GPUBackend {
pub fn new(device_id: usize, max_qubits: usize) -> Result<Self> {
if !SciRS2GpuStateVectorSimulator::is_available() {
return Err(MLError::NotSupported(
"GPU not available on this system".to_string(),
));
}
if max_qubits > 30 {
return Err(MLError::InvalidInput(
"GPU backend limited to 30 qubits due to memory constraints".to_string(),
));
}
Ok(Self {
device_id,
max_qubits,
simulators: Arc::new(Mutex::new(HashMap::new())),
metrics: Arc::new(Mutex::new(GPUMetrics {
total_circuits_executed: 0,
total_gpu_time_ms: 0.0,
total_memory_allocated_mb: 0.0,
cache_hits: 0,
cache_misses: 0,
})),
memory_pool: Arc::new(GPUMemoryPool::new(max_qubits)),
})
}
fn get_simulator(&self, num_qubits: usize) -> Result<Arc<SciRS2GpuStateVectorSimulator>> {
let mut simulators = self.simulators.lock().expect("simulators mutex poisoned");
let mut metrics = self.metrics.lock().expect("metrics mutex poisoned");
if let Some(simulator) = simulators.get(&num_qubits) {
metrics.cache_hits += 1;
Ok(Arc::clone(simulator))
} else {
metrics.cache_misses += 1;
let simulator = SciRS2GpuStateVectorSimulator::new().map_err(|e| {
MLError::ComputationError(format!("Failed to create GPU simulator: {}", e))
})?;
let simulator_arc = Arc::new(simulator);
simulators.insert(num_qubits, Arc::clone(&simulator_arc));
let memory_mb = (1 << num_qubits) * 16 / (1024 * 1024); metrics.total_memory_allocated_mb += memory_mb as f64;
Ok(simulator_arc)
}
}
fn execute_circuit_typed<const N: usize>(
&self,
circuit: &Circuit<N>,
parameters: &[f64],
shots: Option<usize>,
) -> Result<SimulationResult> {
let start_time = std::time::Instant::now();
let simulator = self.get_simulator(N)?;
let mut param_circuit = circuit.clone();
let mut param_idx = 0;
if !parameters.is_empty() {
return Err(MLError::NotSupported(
"Parameterized circuits not yet supported with current GPU API".to_string(),
));
}
return Err(MLError::NotSupported(
"GPU simulation disabled in beta.1 - use CPU backend instead".to_string(),
));
#[allow(unreachable_code)]
let state: Vec<Complex64> = vec![];
let probabilities =
Array1::from(state.iter().map(|amp| amp.norm_sqr()).collect::<Vec<f64>>());
let measurements = if let Some(n_shots) = shots {
probabilities
.as_slice()
.map(|probs| self.sample_measurements(probs, n_shots))
} else {
None
};
let elapsed_ms = start_time.elapsed().as_secs_f64() * 1000.0;
let mut metrics = self.metrics.lock().expect("metrics mutex poisoned");
metrics.total_circuits_executed += 1;
metrics.total_gpu_time_ms += elapsed_ms;
let mut metadata = HashMap::new();
metadata.insert("device_id".to_string(), self.device_id as f64);
metadata.insert("num_qubits".to_string(), N as f64);
metadata.insert("gpu_time_ms".to_string(), elapsed_ms);
metadata.insert(
"total_gpu_circuits".to_string(),
metrics.total_circuits_executed as f64,
);
Ok(SimulationResult {
state: Some(Array1::from(state)),
measurements,
probabilities: Some(probabilities),
metadata,
})
}
fn sample_measurements(&self, _probabilities: &[f64], n_shots: usize) -> Array1<usize> {
let outcomes: Vec<usize> = (0..n_shots).map(|i| i % _probabilities.len()).collect();
Array1::from(outcomes)
}
fn compute_pauli_expectation(
&self,
state: &[Complex64],
pauli_string: &str,
qubits: &[usize],
) -> f64 {
let mut expectation = 0.0;
let num_qubits = (state.len() as f64).log2() as usize;
for (i, amp) in state.iter().enumerate() {
let mut phase = 1.0;
for (q_idx, qubit) in qubits.iter().enumerate() {
let bit = (i >> qubit) & 1;
let pauli = pauli_string.chars().nth(q_idx).unwrap_or('I');
match pauli {
'Z' => {
if bit == 1 {
phase *= -1.0;
}
}
'X' => {
continue;
}
'Y' => {
continue;
}
_ => {}
}
}
expectation += amp.norm_sqr() * phase;
}
expectation
}
fn parameter_shift_gradient_gpu(
&self,
circuit: &DynamicCircuit,
parameters: &[f64],
observable: &Observable,
) -> Result<Array1<f64>> {
let shift = std::f64::consts::PI / 2.0;
let mut gradients = Array1::zeros(parameters.len());
for i in 0..parameters.len() {
let mut params_plus = parameters.to_vec();
params_plus[i] += shift;
let val_plus = self.expectation_value(circuit, ¶ms_plus, observable)?;
let mut params_minus = parameters.to_vec();
params_minus[i] -= shift;
let val_minus = self.expectation_value(circuit, ¶ms_minus, observable)?;
gradients[i] = (val_plus - val_minus) / 2.0;
}
Ok(gradients)
}
}
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
impl SimulatorBackend for GPUBackend {
fn execute_circuit(
&self,
circuit: &DynamicCircuit,
parameters: &[f64],
shots: Option<usize>,
) -> Result<SimulationResult> {
match circuit {
DynamicCircuit::Circuit1(c) => self.execute_circuit_typed(c, parameters, shots),
DynamicCircuit::Circuit2(c) => self.execute_circuit_typed(c, parameters, shots),
DynamicCircuit::Circuit4(c) => self.execute_circuit_typed(c, parameters, shots),
DynamicCircuit::Circuit8(c) => self.execute_circuit_typed(c, parameters, shots),
DynamicCircuit::Circuit16(c) => self.execute_circuit_typed(c, parameters, shots),
DynamicCircuit::Circuit32(c) => {
if circuit.num_qubits() > self.max_qubits {
Err(MLError::InvalidInput(format!(
"Circuit with {} qubits exceeds GPU limit of {}",
circuit.num_qubits(),
self.max_qubits
)))
} else {
self.execute_circuit_typed(c, parameters, shots)
}
}
DynamicCircuit::Circuit64(c) => Err(MLError::InvalidInput(
"64-qubit circuits not supported on GPU due to memory constraints".to_string(),
)),
}
}
fn expectation_value(
&self,
circuit: &DynamicCircuit,
parameters: &[f64],
observable: &Observable,
) -> Result<f64> {
let result = self.execute_circuit(circuit, parameters, None)?;
let state = result
.state
.ok_or_else(|| MLError::BackendError("No state returned from GPU".to_string()))?;
match observable {
Observable::PauliString(pauli) => {
let state_slice = state.as_slice().ok_or_else(|| {
MLError::BackendError("State vector is not contiguous".to_string())
})?;
Ok(self.compute_pauli_expectation(state_slice, "Z", &[0]))
}
Observable::PauliZ(qubits) => {
let pauli_string = "Z".repeat(qubits.len());
let state_slice = state.as_slice().ok_or_else(|| {
MLError::BackendError("State vector is not contiguous".to_string())
})?;
Ok(self.compute_pauli_expectation(state_slice, &pauli_string, qubits))
}
Observable::Hamiltonian(terms) => {
let state_slice = state.as_slice().ok_or_else(|| {
MLError::BackendError("State vector is not contiguous".to_string())
})?;
let mut total = 0.0;
for (coeff, pauli) in terms {
let exp = self.compute_pauli_expectation(state_slice, "Z", &[0]);
total += coeff * exp;
}
Ok(total)
}
Observable::Matrix(matrix) => {
let mut expectation = Complex64::new(0.0, 0.0);
for i in 0..state.len() {
for j in 0..state.len() {
expectation += state[i].conj() * matrix[[i, j]] * state[j];
}
}
Ok(expectation.re)
}
}
}
fn compute_gradients(
&self,
circuit: &DynamicCircuit,
parameters: &[f64],
observable: &Observable,
gradient_method: GradientMethod,
) -> Result<Array1<f64>> {
match gradient_method {
GradientMethod::ParameterShift => {
self.parameter_shift_gradient_gpu(circuit, parameters, observable)
}
GradientMethod::Adjoint => {
self.parameter_shift_gradient_gpu(circuit, parameters, observable)
}
GradientMethod::FiniteDifference => {
let epsilon = 1e-7; let mut gradients = Array1::zeros(parameters.len());
for i in 0..parameters.len() {
let mut params_plus = parameters.to_vec();
params_plus[i] += epsilon;
let val_plus = self.expectation_value(circuit, ¶ms_plus, observable)?;
let mut params_minus = parameters.to_vec();
params_minus[i] -= epsilon;
let val_minus = self.expectation_value(circuit, ¶ms_minus, observable)?;
gradients[i] = (val_plus - val_minus) / (2.0 * epsilon);
}
Ok(gradients)
}
_ => Err(MLError::NotSupported(
"Gradient method not supported on GPU".to_string(),
)),
}
}
fn capabilities(&self) -> BackendCapabilities {
BackendCapabilities {
max_qubits: self.max_qubits,
noise_simulation: false,
gpu_acceleration: true,
distributed: false,
adjoint_gradients: false, memory_per_qubit: 16, }
}
fn name(&self) -> &str {
"gpu_full"
}
fn max_qubits(&self) -> usize {
self.max_qubits
}
fn supports_noise(&self) -> bool {
false }
}
#[cfg(all(feature = "gpu", not(target_os = "macos")))]
impl GPUMemoryPool {
fn new(max_qubits: usize) -> Self {
Self {
buffers: Mutex::new(HashMap::new()),
max_buffer_size: 1 << max_qubits,
}
}
fn allocate(&self, size: usize) -> Vec<Complex64> {
let mut buffers = self.buffers.lock().expect("buffers mutex poisoned");
if let Some(buffer_list) = buffers.get_mut(&size) {
if let Some(buffer) = buffer_list.pop() {
return buffer;
}
}
vec![Complex64::new(0.0, 0.0); size]
}
fn deallocate(&self, mut buffer: Vec<Complex64>) {
if buffer.len() > self.max_buffer_size {
return; }
for val in &mut buffer {
*val = Complex64::new(0.0, 0.0);
}
let mut buffers = self.buffers.lock().expect("buffers mutex poisoned");
buffers
.entry(buffer.len())
.or_insert_with(Vec::new)
.push(buffer);
}
}
#[cfg(not(all(feature = "gpu", not(target_os = "macos"))))]
#[derive(Debug)]
pub struct GPUBackend {
device_id: usize,
max_qubits: usize,
}
#[cfg(not(all(feature = "gpu", not(target_os = "macos"))))]
impl GPUBackend {
pub fn new(device_id: usize, max_qubits: usize) -> Result<Self> {
Err(MLError::NotSupported(
"GPU backend not available on this platform".to_string(),
))
}
}
#[cfg(not(all(feature = "gpu", not(target_os = "macos"))))]
impl SimulatorBackend for GPUBackend {
fn execute_circuit(
&self,
_circuit: &DynamicCircuit,
_parameters: &[f64],
_shots: Option<usize>,
) -> Result<SimulationResult> {
Err(MLError::NotSupported(
"GPU backend not available on this platform".to_string(),
))
}
fn expectation_value(
&self,
_circuit: &DynamicCircuit,
_parameters: &[f64],
_observable: &Observable,
) -> Result<f64> {
Err(MLError::NotSupported(
"GPU backend not available on this platform".to_string(),
))
}
fn compute_gradients(
&self,
_circuit: &DynamicCircuit,
_parameters: &[f64],
_observable: &Observable,
_gradient_method: GradientMethod,
) -> Result<Array1<f64>> {
Err(MLError::NotSupported(
"GPU backend not available on this platform".to_string(),
))
}
fn capabilities(&self) -> BackendCapabilities {
BackendCapabilities {
max_qubits: 0,
noise_simulation: false,
gpu_acceleration: false,
distributed: false,
adjoint_gradients: false,
memory_per_qubit: 0,
}
}
fn name(&self) -> &str {
"gpu_stub"
}
fn max_qubits(&self) -> usize {
0
}
fn supports_noise(&self) -> bool {
false
}
}