1use scirs2_core::ndarray::{Array, Ix2};
4#[cfg(all(feature = "gpu", feature = "dwave"))]
5use scirs2_core::random::{rngs::StdRng, thread_rng, Rng, SeedableRng};
6use std::collections::HashMap;
7
8#[cfg(all(feature = "gpu", feature = "dwave"))]
9use super::super::evaluate_qubo_energy;
10use super::super::{SampleResult, Sampler, SamplerError, SamplerResult};
11
12#[cfg(all(feature = "gpu", feature = "dwave"))]
13use ocl::{
14 self,
15 enums::{DeviceInfo, DeviceInfoResult},
16 Context, DeviceType, Program,
17};
18
19#[cfg(feature = "gpu")]
25pub struct ArminSampler {
26 seed: Option<u64>,
28 mode: String,
30 device: String,
32 verbose: bool,
34}
35
36#[cfg(feature = "gpu")]
37impl ArminSampler {
38 #[must_use]
44 pub fn new(seed: Option<u64>) -> Self {
45 Self {
46 seed,
47 mode: "GPU".to_string(),
48 device: "cuda:0".to_string(),
49 verbose: true,
50 }
51 }
52
53 #[must_use]
62 pub fn with_params(seed: Option<u64>, mode: &str, device: &str, verbose: bool) -> Self {
63 Self {
64 seed,
65 mode: mode.to_string(),
66 device: device.to_string(),
67 verbose,
68 }
69 }
70
71 #[cfg(all(feature = "gpu", feature = "dwave"))]
73 fn run_gpu_annealing(
74 &self,
75 n_vars: usize,
76 h_vector: &[f64],
77 j_matrix: &[f64],
78 num_shots: usize,
79 ) -> Result<Vec<Vec<bool>>, ocl::Error> {
80 use ocl::flags;
81 use ocl::{Buffer, Context, Device, Kernel, Platform, Program, Queue};
82
83 if n_vars > 2048 {
85 if self.verbose {
86 println!(
87 "Problem size too large for standard OpenCL kernel. Using chunked approach."
88 );
89 }
90 return self.run_gpu_annealing_chunked(n_vars, h_vector, j_matrix, num_shots);
91 }
92
93 if self.verbose {
95 println!("Initializing GPU with {n_vars} variables and {num_shots} shots");
96 }
97
98 let platform = if self.device.contains("cpu") {
100 Platform::list()
102 .into_iter()
103 .find(|p| p.name().unwrap_or_default().to_lowercase().contains("cpu"))
104 .unwrap_or_else(Platform::default)
105 } else {
106 Platform::default()
108 };
109
110 if self.verbose {
111 println!("Using platform: {}", platform.name().unwrap_or_default());
112 }
113
114 let device = if self.device.contains("cpu") {
116 Device::list_all(platform)
118 .unwrap_or_default()
119 .into_iter()
120 .find(|d| {
121 matches!(d.info(DeviceInfo::Type).ok(), Some(DeviceInfoResult::Type(dt)) if dt == DeviceType::default().cpu())
122 })
123 .map_or_else(|| Device::first(platform), Ok)?
124 } else {
125 Device::list_all(platform)
127 .unwrap_or_default()
128 .into_iter()
129 .find(|d| {
130 matches!(d.info(DeviceInfo::Type).ok(), Some(DeviceInfoResult::Type(dt)) if dt == DeviceType::default().gpu())
131 })
132 .map_or_else(|| Device::first(platform), Ok)?
133 };
134
135 if self.verbose {
136 println!("Using device: {}", device.name().unwrap_or_default());
137 }
138
139 let context = Context::builder()
141 .platform(platform)
142 .devices(device)
143 .build()?;
144 let queue = Queue::new(&context, device, None)?;
145
146 let src = super::kernels::SIMULATED_ANNEALING_KERNEL;
148
149 let context = Context::builder().devices(device).build()?;
151 let program = Program::builder()
152 .devices(device)
153 .src(src)
154 .build(&context)?;
155
156 let h_buffer = Buffer::<f32>::builder()
158 .queue(queue.clone())
159 .flags(flags::MEM_READ_ONLY)
160 .len(n_vars)
161 .build()?;
162
163 let j_buffer = Buffer::<f32>::builder()
164 .queue(queue.clone())
165 .flags(flags::MEM_READ_ONLY)
166 .len(n_vars * n_vars)
167 .build()?;
168
169 let solutions_buffer = Buffer::<u8>::builder()
170 .queue(queue)
171 .flags(flags::MEM_WRITE_ONLY)
172 .len(num_shots * n_vars)
173 .build()?;
174
175 let h_vec_f32: Vec<f32> = h_vector.iter().map(|&x| x as f32).collect();
177 let j_mat_f32: Vec<f32> = j_matrix.iter().map(|&x| x as f32).collect();
178
179 h_buffer.write(&h_vec_f32).enq()?;
181 j_buffer.write(&j_mat_f32).enq()?;
182
183 let init_temp = 10.0f32;
185 let mut final_temp = 0.1f32;
186 let sweeps = if n_vars < 100 {
187 1000
188 } else if n_vars < 500 {
189 2000
190 } else {
191 5000
192 };
193
194 if self.verbose {
195 println!("Running {sweeps} sweeps with temperature range [{final_temp}, {init_temp}]");
196 }
197
198 let seed_val = self.seed.unwrap_or_else(|| thread_rng().gen());
200
201 let mut kernel = Kernel::builder()
203 .program(&program)
204 .name("simulated_annealing")
205 .global_work_size(num_shots)
206 .arg(n_vars as i32)
207 .arg(&h_buffer)
208 .arg(&j_buffer)
209 .arg(&solutions_buffer)
210 .arg(num_shots as i32)
211 .arg(init_temp)
212 .arg(final_temp)
213 .arg(sweeps)
214 .arg(seed_val)
215 .build()?;
216
217 unsafe {
219 kernel.enq()?;
220 }
221
222 let mut solutions_data = vec![0u8; num_shots * n_vars];
224 solutions_buffer.read(&mut solutions_data).enq()?;
225
226 let mut results = Vec::with_capacity(num_shots);
228 for i in 0..num_shots {
229 let mut solution = Vec::with_capacity(n_vars);
230 for j in 0..n_vars {
231 solution.push(solutions_data[i * n_vars + j] != 0);
232 }
233 results.push(solution);
234 }
235
236 Ok(results)
237 }
238
239 #[cfg(all(feature = "gpu", feature = "dwave"))]
241 fn run_gpu_annealing_chunked(
242 &self,
243 n_vars: usize,
244 h_vector: &[f64],
245 j_matrix: &[f64],
246 num_shots: usize,
247 ) -> Result<Vec<Vec<bool>>, ocl::Error> {
248 if self.verbose {
252 println!("Using chunked approach for large problem: {n_vars} variables");
253 }
254
255 const MAX_CHUNK_SIZE: usize = 1024;
257
258 let num_chunks = n_vars.div_ceil(MAX_CHUNK_SIZE);
260
261 if self.verbose {
262 println!(
263 "Processing in {num_chunks} chunks of at most {MAX_CHUNK_SIZE} variables each"
264 );
265 }
266
267 let mut rng = if let Some(seed) = self.seed {
269 StdRng::seed_from_u64(seed)
270 } else {
271 let seed: u64 = thread_rng().gen();
272 StdRng::seed_from_u64(seed)
273 };
274
275 let mut solutions: Vec<Vec<bool>> = Vec::with_capacity(num_shots);
277 for _ in 0..num_shots {
278 let mut solution = Vec::with_capacity(n_vars);
279 for _ in 0..n_vars {
280 solution.push(rng.gen_bool(0.5));
281 }
282 solutions.push(solution);
283 }
284
285 let mut energies = vec![0.0; num_shots];
287
288 for (i, solution) in solutions.iter().enumerate() {
290 energies[i] = evaluate_qubo_energy(solution, h_vector, j_matrix, n_vars);
291 }
292
293 for chunk_idx in 0..num_chunks {
295 let start_var = chunk_idx * MAX_CHUNK_SIZE;
297 let end_var = std::cmp::min((chunk_idx + 1) * MAX_CHUNK_SIZE, n_vars);
298 let chunk_size = end_var - start_var;
299
300 if self.verbose {
301 println!(
302 "Processing chunk {}/{}: variables {}..{}",
303 chunk_idx + 1,
304 num_chunks,
305 start_var,
306 end_var - 1
307 );
308 }
309
310 let mut chunk_h = Vec::with_capacity(chunk_size);
312 let mut chunk_j = Vec::with_capacity(chunk_size * chunk_size);
313
314 for i in start_var..end_var {
316 chunk_h.push(h_vector[i]);
317 }
318
319 for i in start_var..end_var {
321 for j in start_var..end_var {
322 chunk_j.push(j_matrix[i * n_vars + j]);
323 }
324 }
325
326 for sol_idx in 0..solutions.len() {
328 let mut adjusted_h = chunk_h.clone();
329
330 for i in start_var..end_var {
332 for j in 0..n_vars {
333 if (j < start_var || j >= end_var) && solutions[sol_idx][j] {
334 adjusted_h[i - start_var] += j_matrix[i * n_vars + j];
335 }
336 }
337 }
338
339 let mut chunk_solution = Vec::with_capacity(chunk_size);
341 for i in start_var..end_var {
342 chunk_solution.push(solutions[sol_idx][i]);
343 }
344
345 let optimized_chunk = self.optimize_chunk(
347 &chunk_solution,
348 &adjusted_h,
349 &chunk_j,
350 chunk_size,
351 self.seed.map(|s| s + sol_idx as u64),
352 )?;
353
354 for (i, &val) in optimized_chunk.iter().enumerate() {
356 solutions[sol_idx][start_var + i] = val;
357 }
358
359 energies[sol_idx] =
361 evaluate_qubo_energy(&solutions[sol_idx], h_vector, j_matrix, n_vars);
362 }
363 }
364
365 let mut solution_pairs: Vec<(Vec<bool>, f64)> =
367 solutions.into_iter().zip(energies).collect();
368 solution_pairs
369 .sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
370
371 Ok(solution_pairs.into_iter().map(|(sol, _)| sol).collect())
373 }
374
375 #[cfg(all(feature = "gpu", feature = "dwave"))]
377 fn optimize_chunk(
378 &self,
379 initial_state: &[bool],
380 h_vector: &[f64],
381 j_matrix: &[f64],
382 n_vars: usize,
383 seed: Option<u64>,
384 ) -> Result<Vec<bool>, ocl::Error> {
385 use ocl::flags;
386 use ocl::{Buffer, Context, Device, Kernel, Platform, Program, Queue};
387
388 let platform = Platform::default();
390 let device = Device::first(platform)?;
391 let context = Context::builder()
392 .platform(platform)
393 .devices(device)
394 .build()?;
395 let queue = Queue::new(&context, device, None)?;
396
397 let src = super::kernels::OPTIMIZE_CHUNK_KERNEL;
399
400 let program = Program::builder()
402 .devices(device)
403 .src(src)
404 .build(&context)?;
405
406 let h_buffer = Buffer::<f32>::builder()
408 .queue(queue.clone())
409 .flags(flags::MEM_READ_ONLY)
410 .len(n_vars)
411 .build()?;
412
413 let j_buffer = Buffer::<f32>::builder()
414 .queue(queue.clone())
415 .flags(flags::MEM_READ_ONLY)
416 .len(n_vars * n_vars)
417 .build()?;
418
419 let initial_buffer = Buffer::<u8>::builder()
420 .queue(queue.clone())
421 .flags(flags::MEM_READ_ONLY)
422 .len(n_vars)
423 .build()?;
424
425 let result_buffer = Buffer::<u8>::builder()
426 .queue(queue)
427 .flags(flags::MEM_WRITE_ONLY)
428 .len(n_vars)
429 .build()?;
430
431 let h_vec_f32: Vec<f32> = h_vector.iter().map(|&x| x as f32).collect();
433 let j_mat_f32: Vec<f32> = j_matrix.iter().map(|&x| x as f32).collect();
434 let initial_u8: Vec<u8> = initial_state.iter().map(|&b| u8::from(b)).collect();
435
436 h_buffer.write(&h_vec_f32).enq()?;
438 j_buffer.write(&j_mat_f32).enq()?;
439 initial_buffer.write(&initial_u8).enq()?;
440
441 let mut kernel = Kernel::builder()
443 .program(&program)
444 .name("optimize_chunk")
445 .global_work_size(1) .arg(n_vars as i32)
447 .arg(&h_buffer)
448 .arg(&j_buffer)
449 .arg(&initial_buffer)
450 .arg(&result_buffer)
451 .arg(5000i32) .arg(5.0f32) .arg(0.01f32) .arg(seed.unwrap_or_else(|| thread_rng().gen()))
455 .build()?;
456
457 unsafe {
459 kernel.enq()?;
460 }
461
462 let mut result_u8 = vec![0u8; n_vars];
464 result_buffer.read(&mut result_u8).enq()?;
465
466 let mut result = result_u8.iter().map(|&b| b != 0).collect();
468
469 Ok(result)
470 }
471
472 #[cfg(not(all(feature = "gpu", feature = "dwave")))]
473 fn run_gpu_annealing(
474 &self,
475 _n_vars: usize,
476 _h_vector: &[f64],
477 _j_matrix: &[f64],
478 _num_shots: usize,
479 ) -> Result<Vec<Vec<bool>>, String> {
480 Err("GPU support not enabled. Rebuild with '--features gpu,dwave'".to_string())
481 }
482
483 #[cfg(not(all(feature = "gpu", feature = "dwave")))]
484 fn run_gpu_annealing_chunked(
485 &self,
486 _n_vars: usize,
487 _h_vector: &[f64],
488 _j_matrix: &[f64],
489 _num_shots: usize,
490 ) -> Result<Vec<Vec<bool>>, String> {
491 Err("GPU support not enabled. Rebuild with '--features gpu,dwave'".to_string())
492 }
493
494 #[cfg(not(all(feature = "gpu", feature = "dwave")))]
495 fn optimize_chunk(
496 &self,
497 _initial_state: &[bool],
498 _h_vector: &[f64],
499 _j_matrix: &[f64],
500 _n_vars: usize,
501 _seed: Option<u64>,
502 ) -> Result<Vec<bool>, String> {
503 Err("GPU support not enabled. Rebuild with '--features gpu,dwave'".to_string())
504 }
505}
506
507#[cfg(feature = "gpu")]
508impl Sampler for ArminSampler {
509 fn run_qubo(
510 &self,
511 qubo: &(Array<f64, Ix2>, HashMap<String, usize>),
512 shots: usize,
513 ) -> SamplerResult<Vec<SampleResult>> {
514 let (matrix, var_map) = qubo;
516
517 let n_vars = var_map.len();
519
520 let idx_to_var: HashMap<usize, String> = var_map
522 .iter()
523 .map(|(var, &idx)| (idx, var.clone()))
524 .collect();
525
526 let is_gpu = self.mode.to_uppercase() == "GPU";
528 let device_info = if is_gpu {
529 format!("Using GPU device: {}", self.device)
530 } else {
531 "Using CPU acceleration".to_string()
532 };
533
534 if self.verbose {
535 println!("{device_info}");
536 println!("Problem size: {n_vars} variables");
537 }
538
539 let mut h_vector = Vec::with_capacity(n_vars);
541 let mut j_matrix = Vec::with_capacity(n_vars * n_vars);
542
543 for i in 0..n_vars {
545 h_vector.push(matrix[[i, i]]);
546 }
547
548 for i in 0..n_vars {
550 for j in 0..n_vars {
551 if i == j {
552 j_matrix.push(0.0); } else {
554 j_matrix.push(matrix[[i, j]]);
555 }
556 }
557 }
558
559 #[cfg(all(feature = "gpu", feature = "dwave"))]
561 let ocl_result = self.run_gpu_annealing(n_vars, &h_vector, &j_matrix, shots);
562
563 #[cfg(not(all(feature = "gpu", feature = "dwave")))]
564 let ocl_result: Result<Vec<Vec<i32>>, SamplerError> = Err(SamplerError::GpuError(
565 "GPU support not enabled".to_string(),
566 ));
567
568 #[cfg(all(feature = "gpu", feature = "dwave"))]
569 match ocl_result {
570 Ok(binary_solutions) => {
571 let mut solution_counts: HashMap<Vec<bool>, (f64, usize)> = HashMap::new();
573
574 for solution in binary_solutions {
575 let mut energy = evaluate_qubo_energy(&solution, &h_vector, &j_matrix, n_vars);
577
578 let entry = solution_counts.entry(solution).or_insert((energy, 0));
580 entry.1 += 1;
581 }
582
583 let mut results: Vec<SampleResult> = solution_counts
585 .into_iter()
586 .map(|(bin_solution, (energy, count))| {
587 let assignments: HashMap<String, bool> = bin_solution
589 .iter()
590 .enumerate()
591 .filter_map(|(idx, &value)| {
592 idx_to_var
593 .get(&idx)
594 .map(|var_name| (var_name.clone(), value))
595 })
596 .collect();
597
598 SampleResult {
599 assignments,
600 energy,
601 occurrences: count,
602 }
603 })
604 .collect();
605
606 results.sort_by(|a, b| {
608 a.energy
609 .partial_cmp(&b.energy)
610 .unwrap_or(std::cmp::Ordering::Equal)
611 });
612
613 if results.len() > shots {
615 results.truncate(shots);
616 }
617
618 Ok(results)
619 }
620 Err(e) => Err(SamplerError::GpuError(e.to_string())),
621 }
622
623 #[cfg(not(all(feature = "gpu", feature = "dwave")))]
624 match ocl_result {
625 Ok(_) => unreachable!("GPU support not enabled"),
626 Err(e) => Err(e),
627 }
628 }
629
630 fn run_hobo(
631 &self,
632 hobo: &(
633 Array<f64, scirs2_core::ndarray::IxDyn>,
634 HashMap<String, usize>,
635 ),
636 shots: usize,
637 ) -> SamplerResult<Vec<SampleResult>> {
638 if hobo.0.ndim() == 2 {
640 let matrix = hobo
641 .0
642 .clone()
643 .into_dimensionality::<scirs2_core::ndarray::Ix2>()
644 .map_err(|e| {
645 SamplerError::InvalidParameter(format!(
646 "Failed to convert HOBO to QUBO dimensionality: {e}"
647 ))
648 })?;
649 let qubo = (matrix, hobo.1.clone());
650 return self.run_qubo(&qubo, shots);
651 }
652
653 Err(SamplerError::InvalidParameter(
656 "GPU acceleration for HOBO problems not yet implemented. Consider quadratization to QUBO format.".to_string()
657 ))
658 }
659}
660
661#[cfg(not(feature = "gpu"))]
663pub struct ArminSampler {
664 _seed: Option<u64>,
665}
666
667#[cfg(not(feature = "gpu"))]
668impl ArminSampler {
669 #[must_use]
670 pub const fn new(_seed: Option<u64>) -> Self {
671 Self { _seed }
672 }
673
674 #[must_use]
675 pub const fn with_params(
676 _seed: Option<u64>,
677 _mode: &str,
678 _device: &str,
679 _verbose: bool,
680 ) -> Self {
681 Self { _seed }
682 }
683}
684
685#[cfg(not(feature = "gpu"))]
686impl Sampler for ArminSampler {
687 fn run_qubo(
688 &self,
689 _qubo: &(Array<f64, Ix2>, HashMap<String, usize>),
690 _shots: usize,
691 ) -> SamplerResult<Vec<SampleResult>> {
692 Err(SamplerError::GpuError(
693 "GPU support not enabled. Rebuild with '--features gpu'".to_string(),
694 ))
695 }
696
697 fn run_hobo(
698 &self,
699 _hobo: &(
700 Array<f64, scirs2_core::ndarray::IxDyn>,
701 HashMap<String, usize>,
702 ),
703 _shots: usize,
704 ) -> SamplerResult<Vec<SampleResult>> {
705 Err(SamplerError::GpuError(
706 "GPU support not enabled. Rebuild with '--features gpu'".to_string(),
707 ))
708 }
709}