1use crate::{
10 error::{QuantRS2Error, QuantRS2Result},
11 gate::GateOp,
12 qubit::QubitId,
13};
14use scirs2_core::ndarray::{Array1, Array2};
15use scirs2_core::Complex64;
16use std::sync::Arc;
17
18pub mod cpu_backend;
40pub use cpu_backend::CpuBackend;
41#[cfg(feature = "cuda")]
42pub mod cuda_backend;
43#[cfg(feature = "metal")]
44pub mod metal_backend;
45#[cfg(feature = "metal")]
46pub mod metal_backend_scirs2_ready;
47#[cfg(feature = "vulkan")]
48pub mod vulkan_backend;
49
50pub mod scirs2_adapter;
52pub use crate::gpu_stubs::SciRS2GpuConfig;
53
54pub use scirs2_adapter::{
56 get_gpu_system_info, is_gpu_available, SciRS2BufferAdapter, SciRS2GpuBackend, SciRS2GpuFactory,
57 SciRS2GpuMetrics, SciRS2KernelAdapter,
58};
59
60pub mod adaptive_simd;
62pub mod large_scale_simulation;
63pub mod specialized_kernels;
64
65#[cfg(test)]
67mod metal_backend_tests;
68
69pub use adaptive_simd::{
71 apply_batch_gates_adaptive, apply_single_qubit_adaptive, apply_two_qubit_adaptive,
72 get_adaptive_performance_report, initialize_adaptive_simd, AdaptiveSimdDispatcher, CpuFeatures,
73 SimdVariant,
74};
75pub use large_scale_simulation::{
76 LargeScaleGateType, LargeScaleObservable, LargeScalePerformanceStats, LargeScaleSimAccelerator,
77 LargeScaleSimConfig, LargeScaleStateVectorSim, LargeScaleTensorContractor, SimulationTaskType,
78 TensorDecompositionType,
79};
80pub use specialized_kernels::{
81 FusionType, OptimizationConfig, PerformanceReport, PostQuantumCompressionType,
82 SpecializedGpuKernels,
83};
84
85pub trait GpuBuffer: Send + Sync {
87 fn size(&self) -> usize;
89
90 fn upload(&mut self, data: &[Complex64]) -> QuantRS2Result<()>;
92
93 fn download(&self, data: &mut [Complex64]) -> QuantRS2Result<()>;
95
96 fn sync(&self) -> QuantRS2Result<()>;
98
99 fn as_any(&self) -> &dyn std::any::Any;
101
102 fn as_any_mut(&mut self) -> &mut dyn std::any::Any;
104}
105
106pub trait SpecializedGpuKernel: Send + Sync {
108 fn apply_holonomic_gate(
110 &self,
111 state: &mut dyn GpuBuffer,
112 holonomy_matrix: &[Complex64],
113 target_qubits: &[QubitId],
114 ) -> QuantRS2Result<()>;
115
116 fn apply_post_quantum_hash_gate(
118 &self,
119 state: &mut dyn GpuBuffer,
120 hash_circuit: &[Complex64],
121 compression_type: PostQuantumCompressionType,
122 ) -> QuantRS2Result<()>;
123
124 fn apply_quantum_ml_attention(
126 &self,
127 state: &mut dyn GpuBuffer,
128 query_params: &[Complex64],
129 key_params: &[Complex64],
130 value_params: &[Complex64],
131 num_heads: usize,
132 ) -> QuantRS2Result<()>;
133
134 fn apply_fused_gate_sequence(
136 &self,
137 state: &mut dyn GpuBuffer,
138 gates: &[Box<dyn GateOp>],
139 ) -> QuantRS2Result<()>;
140
141 fn apply_tensor_contraction(
143 &self,
144 tensor_data: &mut dyn GpuBuffer,
145 contraction_indices: &[usize],
146 bond_dimension: usize,
147 ) -> QuantRS2Result<()>;
148}
149
150pub trait GpuKernel: Send + Sync {
152 fn apply_single_qubit_gate(
154 &self,
155 state: &mut dyn GpuBuffer,
156 gate_matrix: &[Complex64; 4],
157 qubit: QubitId,
158 n_qubits: usize,
159 ) -> QuantRS2Result<()>;
160
161 fn apply_two_qubit_gate(
163 &self,
164 state: &mut dyn GpuBuffer,
165 gate_matrix: &[Complex64; 16],
166 control: QubitId,
167 target: QubitId,
168 n_qubits: usize,
169 ) -> QuantRS2Result<()>;
170
171 fn apply_multi_qubit_gate(
173 &self,
174 state: &mut dyn GpuBuffer,
175 gate_matrix: &Array2<Complex64>,
176 qubits: &[QubitId],
177 n_qubits: usize,
178 ) -> QuantRS2Result<()>;
179
180 fn measure_qubit(
182 &self,
183 state: &dyn GpuBuffer,
184 qubit: QubitId,
185 n_qubits: usize,
186 ) -> QuantRS2Result<(bool, f64)>;
187
188 fn expectation_value(
190 &self,
191 state: &dyn GpuBuffer,
192 observable: &Array2<Complex64>,
193 qubits: &[QubitId],
194 n_qubits: usize,
195 ) -> QuantRS2Result<f64>;
196}
197
198pub trait EnhancedGpuBackend: GpuBackend {
200 fn specialized_kernel(&self) -> Option<&dyn SpecializedGpuKernel>;
202
203 fn apply_holonomic_gate(
205 &self,
206 state: &mut dyn GpuBuffer,
207 holonomy_matrix: &[Complex64],
208 target_qubits: &[QubitId],
209 ) -> QuantRS2Result<()> {
210 if let Some(kernel) = self.specialized_kernel() {
211 kernel.apply_holonomic_gate(state, holonomy_matrix, target_qubits)
212 } else {
213 Err(QuantRS2Error::UnsupportedOperation(
214 "Holonomic gates not supported by this backend".to_string(),
215 ))
216 }
217 }
218
219 fn apply_post_quantum_crypto(
221 &self,
222 state: &mut dyn GpuBuffer,
223 hash_circuit: &[Complex64],
224 compression_type: PostQuantumCompressionType,
225 ) -> QuantRS2Result<()> {
226 if let Some(kernel) = self.specialized_kernel() {
227 kernel.apply_post_quantum_hash_gate(state, hash_circuit, compression_type)
228 } else {
229 Err(QuantRS2Error::UnsupportedOperation(
230 "Post-quantum crypto gates not supported by this backend".to_string(),
231 ))
232 }
233 }
234
235 fn apply_quantum_ml_attention(
237 &self,
238 state: &mut dyn GpuBuffer,
239 query_params: &[Complex64],
240 key_params: &[Complex64],
241 value_params: &[Complex64],
242 num_heads: usize,
243 ) -> QuantRS2Result<()> {
244 if let Some(kernel) = self.specialized_kernel() {
245 kernel.apply_quantum_ml_attention(
246 state,
247 query_params,
248 key_params,
249 value_params,
250 num_heads,
251 )
252 } else {
253 Err(QuantRS2Error::UnsupportedOperation(
254 "Quantum ML attention not supported by this backend".to_string(),
255 ))
256 }
257 }
258
259 fn apply_fused_gates(
261 &self,
262 state: &mut dyn GpuBuffer,
263 gates: &[Box<dyn GateOp>],
264 ) -> QuantRS2Result<()> {
265 if let Some(kernel) = self.specialized_kernel() {
266 kernel.apply_fused_gate_sequence(state, gates)
267 } else {
268 for gate in gates {
270 let qubits = gate.qubits();
271 self.apply_gate(state, gate.as_ref(), &qubits, qubits.len())?;
272 }
273 Ok(())
274 }
275 }
276
277 fn optimization_config(&self) -> OptimizationConfig {
279 OptimizationConfig::default()
280 }
281
282 fn performance_stats(&self) -> PerformanceReport {
284 PerformanceReport {
285 average_kernel_times: std::collections::HashMap::new(),
286 cache_hit_rate: 0.0,
287 tensor_core_utilization: 0.0,
288 memory_bandwidth_utilization: 0.0,
289 }
290 }
291}
292
293pub trait GpuBackend: Send + Sync {
295 fn is_available() -> bool
297 where
298 Self: Sized;
299
300 fn name(&self) -> &str;
302
303 fn device_info(&self) -> String;
305
306 fn allocate_state_vector(&self, n_qubits: usize) -> QuantRS2Result<Box<dyn GpuBuffer>>;
308
309 fn allocate_density_matrix(&self, n_qubits: usize) -> QuantRS2Result<Box<dyn GpuBuffer>>;
311
312 fn kernel(&self) -> &dyn GpuKernel;
314
315 fn apply_gate(
317 &self,
318 state: &mut dyn GpuBuffer,
319 gate: &dyn GateOp,
320 qubits: &[QubitId],
321 n_qubits: usize,
322 ) -> QuantRS2Result<()> {
323 match qubits.len() {
324 1 => {
325 let matrix = gate.matrix()?;
326 let gate_array: [Complex64; 4] = [matrix[0], matrix[1], matrix[2], matrix[3]];
327 self.kernel()
328 .apply_single_qubit_gate(state, &gate_array, qubits[0], n_qubits)
329 }
330 2 => {
331 let matrix = gate.matrix()?;
332 let mut gate_array = [Complex64::new(0.0, 0.0); 16];
333 for (i, &val) in matrix.iter().enumerate() {
334 gate_array[i] = val;
335 }
336 self.kernel().apply_two_qubit_gate(
337 state,
338 &gate_array,
339 qubits[0],
340 qubits[1],
341 n_qubits,
342 )
343 }
344 _ => {
345 let matrix_vec = gate.matrix()?;
346 let size = (1 << qubits.len(), 1 << qubits.len());
347 let matrix = Array2::from_shape_vec(size, matrix_vec)?;
348 self.kernel()
349 .apply_multi_qubit_gate(state, &matrix, qubits, n_qubits)
350 }
351 }
352 }
353
354 fn measure(
356 &self,
357 state: &mut dyn GpuBuffer,
358 qubit: QubitId,
359 n_qubits: usize,
360 ) -> QuantRS2Result<bool> {
361 let (outcome, _prob) = self.kernel().measure_qubit(state, qubit, n_qubits)?;
362 Ok(outcome)
363 }
364
365 fn get_probability(
367 &self,
368 state: &dyn GpuBuffer,
369 qubit: QubitId,
370 n_qubits: usize,
371 ) -> QuantRS2Result<f64> {
372 let (_outcome, prob) = self.kernel().measure_qubit(state, qubit, n_qubits)?;
373 Ok(prob)
374 }
375}
376
377pub struct GpuStateVector {
379 backend: Arc<dyn GpuBackend>,
381 buffer: Box<dyn GpuBuffer>,
383 n_qubits: usize,
385}
386
387impl GpuStateVector {
388 pub fn new(backend: Arc<dyn GpuBackend>, n_qubits: usize) -> QuantRS2Result<Self> {
390 let buffer = backend.allocate_state_vector(n_qubits)?;
391 Ok(Self {
392 backend,
393 buffer,
394 n_qubits,
395 })
396 }
397
398 pub fn initialize_zero_state(&mut self) -> QuantRS2Result<()> {
400 let size = 1 << self.n_qubits;
401 let mut data = vec![Complex64::new(0.0, 0.0); size];
402 data[0] = Complex64::new(1.0, 0.0);
403 self.buffer.upload(&data)
404 }
405
406 pub fn apply_gate(&mut self, gate: &dyn GateOp, qubits: &[QubitId]) -> QuantRS2Result<()> {
408 self.backend
409 .apply_gate(self.buffer.as_mut(), gate, qubits, self.n_qubits)
410 }
411
412 pub fn measure(&mut self, qubit: QubitId) -> QuantRS2Result<bool> {
414 self.backend
415 .measure(self.buffer.as_mut(), qubit, self.n_qubits)
416 }
417
418 pub fn to_array(&self) -> QuantRS2Result<Array1<Complex64>> {
420 let size = 1 << self.n_qubits;
421 let mut data = vec![Complex64::new(0.0, 0.0); size];
422 self.buffer.download(&mut data)?;
423 Ok(Array1::from_vec(data))
424 }
425
426 pub fn get_probabilities(&self) -> QuantRS2Result<Vec<f64>> {
428 let state = self.to_array()?;
429 Ok(state.iter().map(|c| c.norm_sqr()).collect())
430 }
431}
432
433pub struct GpuBackendFactory;
435
436impl GpuBackendFactory {
437 pub fn create_best_available() -> QuantRS2Result<Arc<dyn GpuBackend>> {
439 #[cfg(feature = "cuda")]
441 if cuda_backend::CudaBackend::is_available() {
442 return Ok(Arc::new(cuda_backend::CudaBackend::new()?));
443 }
444
445 #[cfg(feature = "metal")]
446 if metal_backend::MetalBackend::is_available() {
447 return Ok(Arc::new(metal_backend::MetalBackend::new()?));
448 }
449
450 #[cfg(feature = "vulkan")]
451 if vulkan_backend::VulkanBackend::is_available() {
452 return Ok(Arc::new(vulkan_backend::VulkanBackend::new()?));
453 }
454
455 Ok(Arc::new(cpu_backend::CpuBackend::new()))
457 }
458
459 pub fn create_backend(backend_type: &str) -> QuantRS2Result<Arc<dyn GpuBackend>> {
461 match backend_type.to_lowercase().as_str() {
462 #[cfg(feature = "cuda")]
463 "cuda" => Ok(Arc::new(cuda_backend::CudaBackend::new()?)),
464
465 #[cfg(feature = "metal")]
466 "metal" => Ok(Arc::new(metal_backend::MetalBackend::new()?)),
467
468 #[cfg(feature = "vulkan")]
469 "vulkan" => Ok(Arc::new(vulkan_backend::VulkanBackend::new()?)),
470
471 "cpu" => Ok(Arc::new(cpu_backend::CpuBackend::new())),
472
473 _ => Err(QuantRS2Error::InvalidInput(format!(
474 "Unknown backend type: {}",
475 backend_type
476 ))),
477 }
478 }
479
480 pub fn available_backends() -> Vec<&'static str> {
482 #[allow(unused_mut)]
483 let mut backends = vec!["cpu"];
484
485 #[cfg(feature = "cuda")]
486 if cuda_backend::CudaBackend::is_available() {
487 backends.push("cuda");
488 }
489
490 #[cfg(feature = "metal")]
491 if metal_backend::MetalBackend::is_available() {
492 backends.push("metal");
493 }
494
495 #[cfg(feature = "vulkan")]
496 if vulkan_backend::VulkanBackend::is_available() {
497 backends.push("vulkan");
498 }
499
500 backends
501 }
502}
503
504#[derive(Debug, Clone)]
506pub struct GpuConfig {
507 pub backend: Option<String>,
509 pub max_memory: Option<usize>,
511 pub num_threads: Option<usize>,
513 pub enable_profiling: bool,
515}
516
517impl Default for GpuConfig {
518 fn default() -> Self {
519 Self {
520 backend: None,
521 max_memory: None,
522 num_threads: None,
523 enable_profiling: false,
524 }
525 }
526}
527
528#[cfg(test)]
529mod tests {
530 use super::*;
531 use crate::gate::single::Hadamard;
532
533 #[test]
534 fn test_gpu_backend_factory() {
535 let backends = GpuBackendFactory::available_backends();
536 assert!(backends.contains(&"cpu"));
537
538 let backend = GpuBackendFactory::create_backend("cpu").unwrap();
540 assert_eq!(backend.name(), "CPU");
541 }
542
543 #[test]
544 fn test_gpu_state_vector() {
545 let backend = GpuBackendFactory::create_best_available().unwrap();
546 let mut state = GpuStateVector::new(backend, 2).unwrap();
547
548 state.initialize_zero_state().unwrap();
550
551 let h_gate = Hadamard { target: QubitId(0) };
553 state.apply_gate(&h_gate, &[QubitId(0)]).unwrap();
554
555 let probs = state.get_probabilities().unwrap();
557 assert_eq!(probs.len(), 4);
558
559 assert!((probs[0] - 0.5).abs() < 1e-10); assert!((probs[1] - 0.5).abs() < 1e-10); assert!((probs[2] - 0.0).abs() < 1e-10); assert!((probs[3] - 0.0).abs() < 1e-10); }
566}