use scirs2_core::ndarray::{Array, Ix2};
#[cfg(all(feature = "gpu", feature = "dwave"))]
use scirs2_core::random::{rngs::StdRng, thread_rng, Rng, RngExt, SeedableRng};
use std::collections::HashMap;
#[cfg(all(feature = "gpu", feature = "dwave"))]
use super::super::evaluate_qubo_energy;
use super::super::{SampleResult, Sampler, SamplerError, SamplerResult};
#[cfg(all(feature = "gpu", feature = "dwave"))]
use ocl::{
self,
enums::{DeviceInfo, DeviceInfoResult},
Context, DeviceType, Program,
};
#[cfg(feature = "gpu")]
pub struct ArminSampler {
seed: Option<u64>,
mode: String,
device: String,
verbose: bool,
}
#[cfg(feature = "gpu")]
impl ArminSampler {
#[must_use]
pub fn new(seed: Option<u64>) -> Self {
Self {
seed,
mode: "GPU".to_string(),
device: "cuda:0".to_string(),
verbose: true,
}
}
#[must_use]
pub fn with_params(seed: Option<u64>, mode: &str, device: &str, verbose: bool) -> Self {
Self {
seed,
mode: mode.to_string(),
device: device.to_string(),
verbose,
}
}
#[cfg(all(feature = "gpu", feature = "dwave"))]
fn run_gpu_annealing(
&self,
n_vars: usize,
h_vector: &[f64],
j_matrix: &[f64],
num_shots: usize,
) -> Result<Vec<Vec<bool>>, ocl::Error> {
use ocl::flags;
use ocl::{Buffer, Context, Device, Kernel, Platform, Program, Queue};
if n_vars > 2048 {
if self.verbose {
println!(
"Problem size too large for standard OpenCL kernel. Using chunked approach."
);
}
return self.run_gpu_annealing_chunked(n_vars, h_vector, j_matrix, num_shots);
}
if self.verbose {
println!("Initializing GPU with {n_vars} variables and {num_shots} shots");
}
let platform = if self.device.contains("cpu") {
Platform::list()
.into_iter()
.find(|p| p.name().unwrap_or_default().to_lowercase().contains("cpu"))
.unwrap_or_else(Platform::default)
} else {
Platform::default()
};
if self.verbose {
println!("Using platform: {}", platform.name().unwrap_or_default());
}
let device = if self.device.contains("cpu") {
Device::list_all(platform)
.unwrap_or_default()
.into_iter()
.find(|d| {
matches!(d.info(DeviceInfo::Type).ok(), Some(DeviceInfoResult::Type(dt)) if dt == DeviceType::default().cpu())
})
.map_or_else(|| Device::first(platform), Ok)?
} else {
Device::list_all(platform)
.unwrap_or_default()
.into_iter()
.find(|d| {
matches!(d.info(DeviceInfo::Type).ok(), Some(DeviceInfoResult::Type(dt)) if dt == DeviceType::default().gpu())
})
.map_or_else(|| Device::first(platform), Ok)?
};
if self.verbose {
println!("Using device: {}", device.name().unwrap_or_default());
}
let context = Context::builder()
.platform(platform)
.devices(device)
.build()?;
let queue = Queue::new(&context, device, None)?;
let src = super::kernels::SIMULATED_ANNEALING_KERNEL;
let context = Context::builder().devices(device).build()?;
let program = Program::builder()
.devices(device)
.src(src)
.build(&context)?;
let h_buffer = Buffer::<f32>::builder()
.queue(queue.clone())
.flags(flags::MEM_READ_ONLY)
.len(n_vars)
.build()?;
let j_buffer = Buffer::<f32>::builder()
.queue(queue.clone())
.flags(flags::MEM_READ_ONLY)
.len(n_vars * n_vars)
.build()?;
let solutions_buffer = Buffer::<u8>::builder()
.queue(queue)
.flags(flags::MEM_WRITE_ONLY)
.len(num_shots * n_vars)
.build()?;
let h_vec_f32: Vec<f32> = h_vector.iter().map(|&x| x as f32).collect();
let j_mat_f32: Vec<f32> = j_matrix.iter().map(|&x| x as f32).collect();
h_buffer.write(&h_vec_f32).enq()?;
j_buffer.write(&j_mat_f32).enq()?;
let init_temp = 10.0f32;
let mut final_temp = 0.1f32;
let sweeps = if n_vars < 100 {
1000
} else if n_vars < 500 {
2000
} else {
5000
};
if self.verbose {
println!("Running {sweeps} sweeps with temperature range [{final_temp}, {init_temp}]");
}
let seed_val = self.seed.unwrap_or_else(|| thread_rng().random());
let mut kernel = Kernel::builder()
.program(&program)
.name("simulated_annealing")
.global_work_size(num_shots)
.arg(n_vars as i32)
.arg(&h_buffer)
.arg(&j_buffer)
.arg(&solutions_buffer)
.arg(num_shots as i32)
.arg(init_temp)
.arg(final_temp)
.arg(sweeps)
.arg(seed_val)
.build()?;
unsafe {
kernel.enq()?;
}
let mut solutions_data = vec![0u8; num_shots * n_vars];
solutions_buffer.read(&mut solutions_data).enq()?;
let mut results = Vec::with_capacity(num_shots);
for i in 0..num_shots {
let mut solution = Vec::with_capacity(n_vars);
for j in 0..n_vars {
solution.push(solutions_data[i * n_vars + j] != 0);
}
results.push(solution);
}
Ok(results)
}
#[cfg(all(feature = "gpu", feature = "dwave"))]
fn run_gpu_annealing_chunked(
&self,
n_vars: usize,
h_vector: &[f64],
j_matrix: &[f64],
num_shots: usize,
) -> Result<Vec<Vec<bool>>, ocl::Error> {
if self.verbose {
println!("Using chunked approach for large problem: {n_vars} variables");
}
const MAX_CHUNK_SIZE: usize = 1024;
let num_chunks = n_vars.div_ceil(MAX_CHUNK_SIZE);
if self.verbose {
println!(
"Processing in {num_chunks} chunks of at most {MAX_CHUNK_SIZE} variables each"
);
}
let mut rng = if let Some(seed) = self.seed {
StdRng::seed_from_u64(seed)
} else {
let seed: u64 = thread_rng().random();
StdRng::seed_from_u64(seed)
};
let mut solutions: Vec<Vec<bool>> = Vec::with_capacity(num_shots);
for _ in 0..num_shots {
let mut solution = Vec::with_capacity(n_vars);
for _ in 0..n_vars {
solution.push(rng.random_bool(0.5));
}
solutions.push(solution);
}
let mut energies = vec![0.0; num_shots];
for (i, solution) in solutions.iter().enumerate() {
energies[i] = evaluate_qubo_energy(solution, h_vector, j_matrix, n_vars);
}
for chunk_idx in 0..num_chunks {
let start_var = chunk_idx * MAX_CHUNK_SIZE;
let end_var = std::cmp::min((chunk_idx + 1) * MAX_CHUNK_SIZE, n_vars);
let chunk_size = end_var - start_var;
if self.verbose {
println!(
"Processing chunk {}/{}: variables {}..{}",
chunk_idx + 1,
num_chunks,
start_var,
end_var - 1
);
}
let mut chunk_h = Vec::with_capacity(chunk_size);
let mut chunk_j = Vec::with_capacity(chunk_size * chunk_size);
for i in start_var..end_var {
chunk_h.push(h_vector[i]);
}
for i in start_var..end_var {
for j in start_var..end_var {
chunk_j.push(j_matrix[i * n_vars + j]);
}
}
for sol_idx in 0..solutions.len() {
let mut adjusted_h = chunk_h.clone();
for i in start_var..end_var {
for j in 0..n_vars {
if (j < start_var || j >= end_var) && solutions[sol_idx][j] {
adjusted_h[i - start_var] += j_matrix[i * n_vars + j];
}
}
}
let mut chunk_solution = Vec::with_capacity(chunk_size);
for i in start_var..end_var {
chunk_solution.push(solutions[sol_idx][i]);
}
let optimized_chunk = self.optimize_chunk(
&chunk_solution,
&adjusted_h,
&chunk_j,
chunk_size,
self.seed.map(|s| s + sol_idx as u64),
)?;
for (i, &val) in optimized_chunk.iter().enumerate() {
solutions[sol_idx][start_var + i] = val;
}
energies[sol_idx] =
evaluate_qubo_energy(&solutions[sol_idx], h_vector, j_matrix, n_vars);
}
}
let mut solution_pairs: Vec<(Vec<bool>, f64)> =
solutions.into_iter().zip(energies).collect();
solution_pairs
.sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
Ok(solution_pairs.into_iter().map(|(sol, _)| sol).collect())
}
#[cfg(all(feature = "gpu", feature = "dwave"))]
fn optimize_chunk(
&self,
initial_state: &[bool],
h_vector: &[f64],
j_matrix: &[f64],
n_vars: usize,
seed: Option<u64>,
) -> Result<Vec<bool>, ocl::Error> {
use ocl::flags;
use ocl::{Buffer, Context, Device, Kernel, Platform, Program, Queue};
let platform = Platform::default();
let device = Device::first(platform)?;
let context = Context::builder()
.platform(platform)
.devices(device)
.build()?;
let queue = Queue::new(&context, device, None)?;
let src = super::kernels::OPTIMIZE_CHUNK_KERNEL;
let program = Program::builder()
.devices(device)
.src(src)
.build(&context)?;
let h_buffer = Buffer::<f32>::builder()
.queue(queue.clone())
.flags(flags::MEM_READ_ONLY)
.len(n_vars)
.build()?;
let j_buffer = Buffer::<f32>::builder()
.queue(queue.clone())
.flags(flags::MEM_READ_ONLY)
.len(n_vars * n_vars)
.build()?;
let initial_buffer = Buffer::<u8>::builder()
.queue(queue.clone())
.flags(flags::MEM_READ_ONLY)
.len(n_vars)
.build()?;
let result_buffer = Buffer::<u8>::builder()
.queue(queue)
.flags(flags::MEM_WRITE_ONLY)
.len(n_vars)
.build()?;
let h_vec_f32: Vec<f32> = h_vector.iter().map(|&x| x as f32).collect();
let j_mat_f32: Vec<f32> = j_matrix.iter().map(|&x| x as f32).collect();
let initial_u8: Vec<u8> = initial_state.iter().map(|&b| u8::from(b)).collect();
h_buffer.write(&h_vec_f32).enq()?;
j_buffer.write(&j_mat_f32).enq()?;
initial_buffer.write(&initial_u8).enq()?;
let mut kernel = Kernel::builder()
.program(&program)
.name("optimize_chunk")
.global_work_size(1) .arg(n_vars as i32)
.arg(&h_buffer)
.arg(&j_buffer)
.arg(&initial_buffer)
.arg(&result_buffer)
.arg(5000i32) .arg(5.0f32) .arg(0.01f32) .arg(seed.unwrap_or_else(|| thread_rng().random()))
.build()?;
unsafe {
kernel.enq()?;
}
let mut result_u8 = vec![0u8; n_vars];
result_buffer.read(&mut result_u8).enq()?;
let mut result = result_u8.iter().map(|&b| b != 0).collect();
Ok(result)
}
#[cfg(not(all(feature = "gpu", feature = "dwave")))]
fn run_gpu_annealing(
&self,
_n_vars: usize,
_h_vector: &[f64],
_j_matrix: &[f64],
_num_shots: usize,
) -> Result<Vec<Vec<bool>>, String> {
Err("GPU support not enabled. Rebuild with '--features gpu,dwave'".to_string())
}
#[cfg(not(all(feature = "gpu", feature = "dwave")))]
fn run_gpu_annealing_chunked(
&self,
_n_vars: usize,
_h_vector: &[f64],
_j_matrix: &[f64],
_num_shots: usize,
) -> Result<Vec<Vec<bool>>, String> {
Err("GPU support not enabled. Rebuild with '--features gpu,dwave'".to_string())
}
#[cfg(not(all(feature = "gpu", feature = "dwave")))]
fn optimize_chunk(
&self,
_initial_state: &[bool],
_h_vector: &[f64],
_j_matrix: &[f64],
_n_vars: usize,
_seed: Option<u64>,
) -> Result<Vec<bool>, String> {
Err("GPU support not enabled. Rebuild with '--features gpu,dwave'".to_string())
}
}
#[cfg(feature = "gpu")]
impl Sampler for ArminSampler {
fn run_qubo(
&self,
qubo: &(Array<f64, Ix2>, HashMap<String, usize>),
shots: usize,
) -> SamplerResult<Vec<SampleResult>> {
let (matrix, var_map) = qubo;
let n_vars = var_map.len();
let idx_to_var: HashMap<usize, String> = var_map
.iter()
.map(|(var, &idx)| (idx, var.clone()))
.collect();
let is_gpu = self.mode.to_uppercase() == "GPU";
let device_info = if is_gpu {
format!("Using GPU device: {}", self.device)
} else {
"Using CPU acceleration".to_string()
};
if self.verbose {
println!("{device_info}");
println!("Problem size: {n_vars} variables");
}
let mut h_vector = Vec::with_capacity(n_vars);
let mut j_matrix = Vec::with_capacity(n_vars * n_vars);
for i in 0..n_vars {
h_vector.push(matrix[[i, i]]);
}
for i in 0..n_vars {
for j in 0..n_vars {
if i == j {
j_matrix.push(0.0); } else {
j_matrix.push(matrix[[i, j]]);
}
}
}
#[cfg(all(feature = "gpu", feature = "dwave"))]
let ocl_result = self.run_gpu_annealing(n_vars, &h_vector, &j_matrix, shots);
#[cfg(not(all(feature = "gpu", feature = "dwave")))]
let ocl_result: Result<Vec<Vec<i32>>, SamplerError> = Err(SamplerError::GpuError(
"GPU support not enabled".to_string(),
));
#[cfg(all(feature = "gpu", feature = "dwave"))]
match ocl_result {
Ok(binary_solutions) => {
let mut solution_counts: HashMap<Vec<bool>, (f64, usize)> = HashMap::new();
for solution in binary_solutions {
let mut energy = evaluate_qubo_energy(&solution, &h_vector, &j_matrix, n_vars);
let entry = solution_counts.entry(solution).or_insert((energy, 0));
entry.1 += 1;
}
let mut results: Vec<SampleResult> = solution_counts
.into_iter()
.map(|(bin_solution, (energy, count))| {
let assignments: HashMap<String, bool> = bin_solution
.iter()
.enumerate()
.filter_map(|(idx, &value)| {
idx_to_var
.get(&idx)
.map(|var_name| (var_name.clone(), value))
})
.collect();
SampleResult {
assignments,
energy,
occurrences: count,
}
})
.collect();
results.sort_by(|a, b| {
a.energy
.partial_cmp(&b.energy)
.unwrap_or(std::cmp::Ordering::Equal)
});
if results.len() > shots {
results.truncate(shots);
}
Ok(results)
}
Err(e) => Err(SamplerError::GpuError(e.to_string())),
}
#[cfg(not(all(feature = "gpu", feature = "dwave")))]
match ocl_result {
Ok(_) => unreachable!("GPU support not enabled"),
Err(e) => Err(e),
}
}
fn run_hobo(
&self,
hobo: &(
Array<f64, scirs2_core::ndarray::IxDyn>,
HashMap<String, usize>,
),
shots: usize,
) -> SamplerResult<Vec<SampleResult>> {
if hobo.0.ndim() == 2 {
let matrix = hobo
.0
.clone()
.into_dimensionality::<scirs2_core::ndarray::Ix2>()
.map_err(|e| {
SamplerError::InvalidParameter(format!(
"Failed to convert HOBO to QUBO dimensionality: {e}"
))
})?;
let qubo = (matrix, hobo.1.clone());
return self.run_qubo(&qubo, shots);
}
Err(SamplerError::InvalidParameter(
"GPU acceleration for HOBO problems not yet implemented. Consider quadratization to QUBO format.".to_string()
))
}
}
#[cfg(not(feature = "gpu"))]
pub struct ArminSampler {
_seed: Option<u64>,
}
#[cfg(not(feature = "gpu"))]
impl ArminSampler {
#[must_use]
pub const fn new(_seed: Option<u64>) -> Self {
Self { _seed }
}
#[must_use]
pub const fn with_params(
_seed: Option<u64>,
_mode: &str,
_device: &str,
_verbose: bool,
) -> Self {
Self { _seed }
}
}
#[cfg(not(feature = "gpu"))]
impl Sampler for ArminSampler {
fn run_qubo(
&self,
_qubo: &(Array<f64, Ix2>, HashMap<String, usize>),
_shots: usize,
) -> SamplerResult<Vec<SampleResult>> {
Err(SamplerError::GpuError(
"GPU support not enabled. Rebuild with '--features gpu'".to_string(),
))
}
fn run_hobo(
&self,
_hobo: &(
Array<f64, scirs2_core::ndarray::IxDyn>,
HashMap<String, usize>,
),
_shots: usize,
) -> SamplerResult<Vec<SampleResult>> {
Err(SamplerError::GpuError(
"GPU support not enabled. Rebuild with '--features gpu'".to_string(),
))
}
}