use scirs2_core::ndarray::{Array, ArrayD, Dimension, Ix2};
use scirs2_core::random::{thread_rng, Rng, RngExt};
use std::collections::HashMap;
use thiserror::Error;
use crate::sampler::SampleResult;
use quantrs2_anneal::is_available as anneal_gpu_available;
#[derive(Error, Debug)]
pub enum GpuError {
#[error("GPU not available: {0}")]
NotAvailable(String),
#[error("Memory transfer error: {0}")]
MemoryTransfer(String),
#[error("Kernel execution error: {0}")]
KernelExecution(String),
#[error("Tensor operation error: {0}")]
TensorOperation(String),
}
pub type GpuResult<T> = Result<T, GpuError>;
pub const fn is_available() -> bool {
#[cfg(feature = "gpu_accelerated")]
{
#[cfg(feature = "scirs")]
{
anneal_gpu_available()
}
#[cfg(not(feature = "scirs"))]
{
match ocl::Platform::list().first() {
Some(_) => true,
None => false,
}
}
}
#[cfg(not(feature = "gpu_accelerated"))]
{
false
}
}
#[cfg(feature = "gpu_accelerated")]
pub fn gpu_solve_qubo(
matrix: &Array<f64, Ix2>,
var_map: &HashMap<String, usize>,
shots: usize,
temperature_steps: usize,
) -> GpuResult<Vec<SampleResult>> {
let n_vars = var_map.len();
let idx_to_var: HashMap<usize, String> = var_map
.iter()
.map(|(var, &idx)| (idx, var.clone()))
.collect();
#[cfg(feature = "scirs")]
{
use crate::scirs_stub::scirs2_core::gpu::{GpuArray, GpuDevice};
let device = GpuDevice::new(0).map_err(|e| GpuError::NotAvailable(e.to_string()))?;
let gpu_matrix = GpuArray::from_ndarray(device.clone(), matrix)
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let gpu_states = device
.random_array::<f32>((shots, n_vars))
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let gpu_binary = device
.binarize(&gpu_states, 0.5)
.map_err(|e| GpuError::KernelExecution(e.to_string()))?;
let gpu_energies = device
.qubo_energy(&gpu_binary, &gpu_matrix)
.map_err(|e| GpuError::KernelExecution(e.to_string()))?;
let binary_states: Array<bool, Ix2> = gpu_binary
.to_ndarray()
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let energies: Array<f64, Ix2> = gpu_energies
.to_ndarray()
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let mut results = Vec::new();
for i in 0..shots {
let state = binary_states.slice(scirs2_core::ndarray::s![i, ..]);
let energy = energies[[i, 0]];
let assignments: HashMap<String, bool> = state
.iter()
.enumerate()
.filter_map(|(idx, &value)| {
idx_to_var
.get(&idx)
.map(|var_name| (var_name.clone(), value))
})
.collect();
let result = SampleResult {
assignments,
energy,
occurrences: 1, };
results.push(result);
}
results.sort_by(|a, b| {
a.energy
.partial_cmp(&b.energy)
.unwrap_or(std::cmp::Ordering::Equal)
});
let mut consolidated = HashMap::new();
for result in results {
let mut sorted_assignments: Vec<(String, bool)> = result
.assignments
.iter()
.map(|(k, &v)| (k.clone(), v))
.collect();
sorted_assignments.sort_by(|a, b| a.0.cmp(&b.0));
let entry = consolidated
.entry(sorted_assignments)
.or_insert_with(|| (result.assignments.clone(), result.energy, 0));
entry.2 += 1;
}
let mut final_results: Vec<SampleResult> = consolidated
.into_iter()
.map(|(_, (assignments, energy, occurrences))| SampleResult {
assignments,
energy,
occurrences,
})
.collect();
final_results.sort_by(|a, b| {
a.energy
.partial_cmp(&b.energy)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(final_results)
}
#[cfg(not(feature = "scirs"))]
{
use ocl::{Buffer, MemFlags, ProQue};
let ocl_pq = ProQue::builder()
.src(
r#"
__kernel void qubo_energy(__global const uchar* binary,
__global const double* matrix,
__global double* energies,
const int n_vars) {
int gid = get_global_id(0);
int offset = gid * n_vars;
double energy = 0.0;
// Linear terms
for (int i = 0; i < n_vars; i++) {
if (binary[offset + i]) {
energy += matrix[i * n_vars + i];
}
}
// Quadratic terms
for (int i = 0; i < n_vars; i++) {
if (binary[offset + i]) {
for (int j = i + 1; j < n_vars; j++) {
if (binary[offset + j]) {
energy += matrix[i * n_vars + j];
}
}
}
}
energies[gid] = energy;
}
"#,
)
.dims(shots)
.build()
.map_err(|e| GpuError::NotAvailable(e.to_string()))?;
let flat_matrix: Vec<f64> = matrix.iter().cloned().collect();
let mut rng = thread_rng();
let binary_states: Vec<u8> = (0..shots * n_vars)
.map(|_| if rng.random::<bool>() { 1u8 } else { 0u8 })
.collect();
let binary_buffer = Buffer::builder()
.queue(ocl_pq.queue().clone())
.flags(MemFlags::READ_ONLY)
.len(shots * n_vars)
.copy_host_slice(&binary_states)
.build()
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let matrix_buffer = Buffer::builder()
.queue(ocl_pq.queue().clone())
.flags(MemFlags::READ_ONLY)
.len(n_vars * n_vars)
.copy_host_slice(&flat_matrix)
.build()
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let energies_buffer = Buffer::builder()
.queue(ocl_pq.queue().clone())
.flags(MemFlags::WRITE_ONLY)
.len(shots)
.build()
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let mut kernel = ocl_pq
.kernel_builder("qubo_energy")
.arg(&binary_buffer)
.arg(&matrix_buffer)
.arg(&energies_buffer)
.arg(n_vars as i32)
.build()
.map_err(|e| GpuError::KernelExecution(e.to_string()))?;
unsafe {
kernel
.enq()
.map_err(|e| GpuError::KernelExecution(e.to_string()))?;
}
let mut energies = vec![0.0f64; shots];
energies_buffer
.read(&mut energies)
.enq()
.map_err(|e| GpuError::MemoryTransfer(e.to_string()))?;
let mut results = Vec::new();
for i in 0..shots {
let state: Vec<bool> = binary_states[i * n_vars..(i + 1) * n_vars]
.iter()
.map(|&b| b == 1)
.collect();
let assignments: HashMap<String, bool> = state
.iter()
.enumerate()
.filter_map(|(idx, &value)| {
idx_to_var
.get(&idx)
.map(|var_name| (var_name.clone(), value))
})
.collect();
let result = SampleResult {
assignments,
energy: energies[i],
occurrences: 1, };
results.push(result);
}
results.sort_by(|a, b| {
a.energy
.partial_cmp(&b.energy)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(results)
}
}
#[cfg(not(feature = "gpu_accelerated"))]
pub fn gpu_solve_qubo(
_matrix: &Array<f64, Ix2>,
_var_map: &HashMap<String, usize>,
_shots: usize,
_temperature_steps: usize,
) -> GpuResult<Vec<SampleResult>> {
Err(GpuError::NotAvailable(
"GPU acceleration not enabled. Rebuild with '--features gpu_accelerated'".to_string(),
))
}
#[cfg(all(feature = "gpu_accelerated", feature = "scirs"))]
pub fn gpu_solve_hobo(
tensor: &ArrayD<f64>,
var_map: &HashMap<String, usize>,
shots: usize,
temperature_steps: usize,
) -> GpuResult<Vec<SampleResult>> {
let n_vars = var_map.len();
let idx_to_var: HashMap<usize, String> = var_map
.iter()
.map(|(var, &idx)| (idx, var.clone()))
.collect();
use scirs2_core::random::{rngs::StdRng, RngExt, SeedableRng};
let evaluate_hobo_energy = |tensor: &scirs2_core::ndarray::ArrayD<f64>, x: &[u8]| -> f64 {
let mut energy = 0.0_f64;
for (idx, &coeff) in tensor.indexed_iter() {
if coeff.abs() < 1e-14 {
continue;
}
let index_slice = idx.slice();
let contrib: f64 = index_slice
.iter()
.map(|&i| if i < x.len() { x[i] as f64 } else { 0.0 })
.product();
energy += coeff * contrib;
}
energy
};
let mut results = Vec::new();
let num_sweeps = temperature_steps.max(1);
for shot_idx in 0..shots {
let mut rng = StdRng::seed_from_u64(shot_idx as u64 + 1);
let mut x: Vec<u8> = (0..n_vars).map(|_| rng.random_range(0..2u8)).collect();
let mut energy = evaluate_hobo_energy(tensor, &x);
let initial_temperature = 1.0_f64;
let cooling_rate = 0.99_f64;
let mut temperature = initial_temperature;
for _sweep in 0..num_sweeps {
for i in 0..n_vars {
x[i] ^= 1;
let new_energy = evaluate_hobo_energy(tensor, &x);
let delta = new_energy - energy;
let accept = delta < 0.0
|| (temperature > 1e-12 && rng.random::<f64>() < (-delta / temperature).exp());
if accept {
energy = new_energy;
} else {
x[i] ^= 1;
}
}
temperature *= cooling_rate;
}
let assignments: HashMap<String, bool> = x
.iter()
.enumerate()
.filter_map(|(idx, &bit)| idx_to_var.get(&idx).map(|name| (name.clone(), bit != 0)))
.collect();
results.push(SampleResult {
assignments,
energy,
occurrences: 1,
});
}
results.sort_by(|a, b| {
a.energy
.partial_cmp(&b.energy)
.unwrap_or(std::cmp::Ordering::Equal)
});
Ok(results)
}
#[cfg(not(all(feature = "gpu_accelerated", feature = "scirs")))]
pub fn gpu_solve_hobo(
_tensor: &ArrayD<f64>,
_var_map: &HashMap<String, usize>,
_shots: usize,
_temperature_steps: usize,
) -> GpuResult<Vec<SampleResult>> {
Err(GpuError::NotAvailable("GPU acceleration for HOBO not available. Requires both 'gpu_accelerated' and 'scirs' features.".to_string()))
}