1use scirs2_core::ndarray::Array2;
8use scirs2_core::Complex64;
9use scirs2_core::parallel_ops::*;
10use std::alloc::{GlobalAlloc, Layout, System};
11use std::collections::{HashMap, VecDeque};
12use std::ptr::NonNull;
13use std::sync::{Arc, Mutex, RwLock};
14use std::time::{Duration, Instant};
15
16use crate::error::{Result, SimulatorError};
17use crate::scirs2_integration::SciRS2Backend;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum MemoryLayout {
22 Contiguous,
24 CacheAligned,
26 Blocked,
28 Interleaved,
30 Hierarchical,
32 Adaptive,
34}
35
36#[derive(Debug, Clone)]
38pub struct MemoryOptimizationConfig {
39 pub layout: MemoryLayout,
41 pub cache_line_size: usize,
43 pub l1_cache_size: usize,
45 pub l2_cache_size: usize,
47 pub l3_cache_size: usize,
49 pub block_size: usize,
51 pub enable_prefetching: bool,
53 pub prefetch_distance: usize,
55 pub enable_numa_optimization: bool,
57 pub memory_pool_size: usize,
59 pub enable_bandwidth_monitoring: bool,
61 pub adaptation_threshold: f64,
63}
64
65impl Default for MemoryOptimizationConfig {
66 fn default() -> Self {
67 Self {
68 layout: MemoryLayout::Adaptive,
69 cache_line_size: 64, l1_cache_size: 32 * 1024, l2_cache_size: 256 * 1024, l3_cache_size: 8 * 1024 * 1024, block_size: 4096, enable_prefetching: true,
75 prefetch_distance: 4,
76 enable_numa_optimization: true,
77 memory_pool_size: 1024 * 1024 * 1024, enable_bandwidth_monitoring: true,
79 adaptation_threshold: 0.1,
80 }
81 }
82}
83
84#[derive(Debug, Clone)]
86pub struct MemoryAccessPattern {
87 pub access_frequency: HashMap<usize, u64>,
89 pub sequential_accesses: VecDeque<(usize, usize)>,
91 pub random_accesses: VecDeque<usize>,
93 pub cache_misses: u64,
95 pub total_accesses: u64,
97 pub last_access_time: Instant,
99}
100
101impl Default for MemoryAccessPattern {
102 fn default() -> Self {
103 Self {
104 access_frequency: HashMap::new(),
105 sequential_accesses: VecDeque::new(),
106 random_accesses: VecDeque::new(),
107 cache_misses: 0,
108 total_accesses: 0,
109 last_access_time: Instant::now(),
110 }
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct BandwidthMonitor {
117 pub bandwidth_samples: VecDeque<(Instant, f64)>,
119 pub current_utilization: f64,
121 pub peak_bandwidth: f64,
123 pub average_bandwidth: f64,
125 pub latency_samples: VecDeque<Duration>,
127}
128
129impl Default for BandwidthMonitor {
130 fn default() -> Self {
131 Self {
132 bandwidth_samples: VecDeque::new(),
133 current_utilization: 0.0,
134 peak_bandwidth: 0.0,
135 average_bandwidth: 0.0,
136 latency_samples: VecDeque::new(),
137 }
138 }
139}
140
141#[derive(Debug)]
143pub struct MemoryPool {
144 blocks: Mutex<Vec<(*mut u8, usize)>>,
146 block_size: usize,
148 max_blocks: usize,
150 allocated_count: Mutex<usize>,
152}
153
154impl MemoryPool {
155 pub fn new(block_size: usize, max_blocks: usize) -> Result<Self> {
157 Ok(Self {
158 blocks: Mutex::new(Vec::new()),
159 block_size,
160 max_blocks,
161 allocated_count: Mutex::new(0),
162 })
163 }
164
165 pub fn allocate(&self) -> Result<NonNull<u8>> {
167 let mut blocks = self.blocks.lock().unwrap();
168
169 if let Some((ptr, _)) = blocks.pop() {
170 Ok(unsafe { NonNull::new_unchecked(ptr) })
171 } else {
172 let layout = Layout::from_size_align(self.block_size, 64)
174 .map_err(|e| SimulatorError::MemoryAllocationFailed(e.to_string()))?;
175
176 let ptr = unsafe { System.alloc(layout) };
177 if ptr.is_null() {
178 return Err(SimulatorError::MemoryAllocationFailed(
179 "Failed to allocate memory block".to_string(),
180 ));
181 }
182
183 let mut count = self.allocated_count.lock().unwrap();
184 *count += 1;
185
186 Ok(unsafe { NonNull::new_unchecked(ptr) })
187 }
188 }
189
190 pub fn deallocate(&self, ptr: NonNull<u8>) -> Result<()> {
192 let mut blocks = self.blocks.lock().unwrap();
193
194 if blocks.len() < self.max_blocks {
195 blocks.push((ptr.as_ptr(), self.block_size));
196 } else {
197 let layout = Layout::from_size_align(self.block_size, 64)
199 .map_err(|e| SimulatorError::MemoryAllocationFailed(e.to_string()))?;
200 unsafe { System.dealloc(ptr.as_ptr(), layout) };
201
202 let mut count = self.allocated_count.lock().unwrap();
203 *count -= 1;
204 }
205
206 Ok(())
207 }
208}
209
210unsafe impl Send for MemoryPool {}
211unsafe impl Sync for MemoryPool {}
212
213#[derive(Debug)]
215pub struct OptimizedStateVector {
216 data: Vec<Complex64>,
218 num_qubits: usize,
220 layout: MemoryLayout,
222 block_size: usize,
224 access_pattern: Arc<RwLock<MemoryAccessPattern>>,
226 bandwidth_monitor: Arc<RwLock<BandwidthMonitor>>,
228 memory_pool: Arc<MemoryPool>,
230 config: MemoryOptimizationConfig,
232}
233
234impl OptimizedStateVector {
235 pub fn new(num_qubits: usize, config: MemoryOptimizationConfig) -> Result<Self> {
237 let size = 1 << num_qubits;
238 let memory_pool = Arc::new(MemoryPool::new(
239 config.memory_pool_size / 1024, 1024, )?);
242
243 let mut data = Self::allocate_with_layout(size, config.layout, &config)?;
244
245 data[0] = Complex64::new(1.0, 0.0);
247
248 Ok(Self {
249 data,
250 num_qubits,
251 layout: config.layout,
252 block_size: config.block_size,
253 access_pattern: Arc::new(RwLock::new(MemoryAccessPattern::default())),
254 bandwidth_monitor: Arc::new(RwLock::new(BandwidthMonitor::default())),
255 memory_pool,
256 config,
257 })
258 }
259
260 fn allocate_with_layout(
262 size: usize,
263 layout: MemoryLayout,
264 config: &MemoryOptimizationConfig,
265 ) -> Result<Vec<Complex64>> {
266 match layout {
267 MemoryLayout::Contiguous => {
268 let mut data = Vec::with_capacity(size);
269 data.resize(size, Complex64::new(0.0, 0.0));
270 Ok(data)
271 }
272 MemoryLayout::CacheAligned => Self::allocate_cache_aligned(size, config),
273 MemoryLayout::Blocked => Self::allocate_blocked(size, config),
274 MemoryLayout::Interleaved => Self::allocate_interleaved(size, config),
275 MemoryLayout::Hierarchical => Self::allocate_hierarchical(size, config),
276 MemoryLayout::Adaptive => {
277 Self::allocate_cache_aligned(size, config)
279 }
280 }
281 }
282
283 fn allocate_cache_aligned(
285 size: usize,
286 config: &MemoryOptimizationConfig,
287 ) -> Result<Vec<Complex64>> {
288 let element_size = std::mem::size_of::<Complex64>();
289 let elements_per_line = config.cache_line_size / element_size;
290 let padded_size = ((size + elements_per_line - 1) / elements_per_line) * elements_per_line;
291
292 let mut data = Vec::with_capacity(padded_size);
293 data.resize(size, Complex64::new(0.0, 0.0));
294 data.resize(padded_size, Complex64::new(0.0, 0.0)); Ok(data)
297 }
298
299 fn allocate_blocked(size: usize, config: &MemoryOptimizationConfig) -> Result<Vec<Complex64>> {
301 let mut data = Vec::with_capacity(size);
302 data.resize(size, Complex64::new(0.0, 0.0));
303
304 let block_size = config.block_size / std::mem::size_of::<Complex64>();
306 let num_blocks = (size + block_size - 1) / block_size;
307
308 let mut blocked_data = Vec::with_capacity(size);
309 for block_idx in 0..num_blocks {
310 let start = block_idx * block_size;
311 let end = std::cmp::min(start + block_size, size);
312
313 for i in start..end {
314 blocked_data.push(data[i]);
315 }
316 }
317
318 Ok(blocked_data)
319 }
320
321 fn allocate_interleaved(
323 size: usize,
324 _config: &MemoryOptimizationConfig,
325 ) -> Result<Vec<Complex64>> {
326 let mut data = Vec::with_capacity(size);
329 data.resize(size, Complex64::new(0.0, 0.0));
330 Ok(data)
331 }
332
333 fn allocate_hierarchical(
335 size: usize,
336 config: &MemoryOptimizationConfig,
337 ) -> Result<Vec<Complex64>> {
338 let l1_elements = config.l1_cache_size / std::mem::size_of::<Complex64>();
340 let l2_elements = config.l2_cache_size / std::mem::size_of::<Complex64>();
341
342 let mut data = Vec::with_capacity(size);
343 data.resize(size, Complex64::new(0.0, 0.0));
344
345 Ok(data)
348 }
349
350 pub fn apply_single_qubit_gate_optimized(
352 &mut self,
353 target: usize,
354 gate_matrix: &Array2<Complex64>,
355 ) -> Result<()> {
356 let start_time = Instant::now();
357
358 let mask = 1 << target;
359 let size = self.data.len();
360
361 match self.layout {
363 MemoryLayout::Blocked => {
364 self.apply_single_qubit_gate_blocked(target, gate_matrix, mask)?;
365 }
366 MemoryLayout::CacheAligned => {
367 self.apply_single_qubit_gate_cache_aligned(target, gate_matrix, mask)?;
368 }
369 _ => {
370 self.apply_single_qubit_gate_standard(target, gate_matrix, mask)?;
371 }
372 }
373
374 let elapsed = start_time.elapsed();
376 self.update_bandwidth_monitor(size * std::mem::size_of::<Complex64>(), elapsed);
377
378 Ok(())
379 }
380
381 fn apply_single_qubit_gate_blocked(
383 &mut self,
384 target: usize,
385 gate_matrix: &Array2<Complex64>,
386 mask: usize,
387 ) -> Result<()> {
388 let block_size = self.block_size / std::mem::size_of::<Complex64>();
389 let num_blocks = (self.data.len() + block_size - 1) / block_size;
390
391 for block_idx in 0..num_blocks {
392 let start = block_idx * block_size;
393 let end = std::cmp::min(start + block_size, self.data.len());
394
395 if self.config.enable_prefetching && block_idx + 1 < num_blocks {
397 let next_start = (block_idx + 1) * block_size;
398 if next_start < self.data.len() {
399 Self::prefetch_memory(&self.data[next_start]);
400 }
401 }
402
403 for i in (start..end).step_by(2) {
405 if i + 1 < self.data.len() {
406 let i0 = i & !mask;
407 let i1 = i0 | mask;
408
409 if i1 < self.data.len() {
410 let amp0 = self.data[i0];
411 let amp1 = self.data[i1];
412
413 self.data[i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
414 self.data[i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
415 }
416 }
417 }
418 }
419
420 Ok(())
421 }
422
423 fn apply_single_qubit_gate_cache_aligned(
425 &mut self,
426 target: usize,
427 gate_matrix: &Array2<Complex64>,
428 mask: usize,
429 ) -> Result<()> {
430 let elements_per_line = self.config.cache_line_size / std::mem::size_of::<Complex64>();
431
432 for chunk_start in (0..self.data.len()).step_by(elements_per_line) {
433 let chunk_end = std::cmp::min(chunk_start + elements_per_line, self.data.len());
434
435 if self.config.enable_prefetching && chunk_end < self.data.len() {
437 Self::prefetch_memory(&self.data[chunk_end]);
438 }
439
440 for i in (chunk_start..chunk_end).step_by(2) {
441 if i + 1 < self.data.len() {
442 let i0 = i & !mask;
443 let i1 = i0 | mask;
444
445 if i1 < self.data.len() {
446 let amp0 = self.data[i0];
447 let amp1 = self.data[i1];
448
449 self.data[i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
450 self.data[i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
451 }
452 }
453 }
454 }
455
456 Ok(())
457 }
458
459 fn apply_single_qubit_gate_standard(
461 &mut self,
462 target: usize,
463 gate_matrix: &Array2<Complex64>,
464 mask: usize,
465 ) -> Result<()> {
466 for i in (0..self.data.len()).step_by(2) {
467 let i0 = i & !mask;
468 let i1 = i0 | mask;
469
470 if i1 < self.data.len() {
471 let amp0 = self.data[i0];
472 let amp1 = self.data[i1];
473
474 self.data[i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
475 self.data[i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
476 }
477 }
478
479 Ok(())
480 }
481
482 #[inline(always)]
484 fn prefetch_memory(addr: &Complex64) {
485 unsafe {
488 let _ = std::ptr::read_volatile(addr as *const _ as *const u8);
489 }
490 }
491
492 pub fn apply_two_qubit_gate_optimized(
494 &mut self,
495 control: usize,
496 target: usize,
497 gate_matrix: &Array2<Complex64>,
498 ) -> Result<()> {
499 let start_time = Instant::now();
500
501 let control_mask = 1 << control;
502 let target_mask = 1 << target;
503 let size = self.data.len();
504
505 match self.layout {
507 MemoryLayout::Blocked => {
508 self.apply_two_qubit_gate_blocked(control_mask, target_mask, gate_matrix)?;
509 }
510 _ => {
511 self.apply_two_qubit_gate_standard(control_mask, target_mask, gate_matrix)?;
512 }
513 }
514
515 let elapsed = start_time.elapsed();
517 self.update_bandwidth_monitor(size * std::mem::size_of::<Complex64>(), elapsed);
518
519 Ok(())
520 }
521
522 fn apply_two_qubit_gate_blocked(
524 &mut self,
525 control_mask: usize,
526 target_mask: usize,
527 gate_matrix: &Array2<Complex64>,
528 ) -> Result<()> {
529 let block_size = self.block_size / std::mem::size_of::<Complex64>();
530 let num_blocks = (self.data.len() + block_size - 1) / block_size;
531
532 for block_idx in 0..num_blocks {
533 let start = block_idx * block_size;
534 let end = std::cmp::min(start + block_size, self.data.len());
535
536 if self.config.enable_prefetching && block_idx + 1 < num_blocks {
538 let next_start = (block_idx + 1) * block_size;
539 if next_start < self.data.len() {
540 Self::prefetch_memory(&self.data[next_start]);
541 }
542 }
543
544 for i in (start..end).step_by(4) {
546 if i + 3 < self.data.len() {
547 let i00 = i & !(control_mask | target_mask);
548 let i01 = i00 | target_mask;
549 let i10 = i00 | control_mask;
550 let i11 = i00 | control_mask | target_mask;
551
552 if i11 < self.data.len() {
553 let amp00 = self.data[i00];
554 let amp01 = self.data[i01];
555 let amp10 = self.data[i10];
556 let amp11 = self.data[i11];
557
558 self.data[i00] = gate_matrix[[0, 0]] * amp00
559 + gate_matrix[[0, 1]] * amp01
560 + gate_matrix[[0, 2]] * amp10
561 + gate_matrix[[0, 3]] * amp11;
562 self.data[i01] = gate_matrix[[1, 0]] * amp00
563 + gate_matrix[[1, 1]] * amp01
564 + gate_matrix[[1, 2]] * amp10
565 + gate_matrix[[1, 3]] * amp11;
566 self.data[i10] = gate_matrix[[2, 0]] * amp00
567 + gate_matrix[[2, 1]] * amp01
568 + gate_matrix[[2, 2]] * amp10
569 + gate_matrix[[2, 3]] * amp11;
570 self.data[i11] = gate_matrix[[3, 0]] * amp00
571 + gate_matrix[[3, 1]] * amp01
572 + gate_matrix[[3, 2]] * amp10
573 + gate_matrix[[3, 3]] * amp11;
574 }
575 }
576 }
577 }
578
579 Ok(())
580 }
581
582 fn apply_two_qubit_gate_standard(
584 &mut self,
585 control_mask: usize,
586 target_mask: usize,
587 gate_matrix: &Array2<Complex64>,
588 ) -> Result<()> {
589 for i in (0..self.data.len()).step_by(4) {
590 let i00 = i & !(control_mask | target_mask);
591 let i01 = i00 | target_mask;
592 let i10 = i00 | control_mask;
593 let i11 = i00 | control_mask | target_mask;
594
595 if i11 < self.data.len() {
596 let amp00 = self.data[i00];
597 let amp01 = self.data[i01];
598 let amp10 = self.data[i10];
599 let amp11 = self.data[i11];
600
601 self.data[i00] = gate_matrix[[0, 0]] * amp00
602 + gate_matrix[[0, 1]] * amp01
603 + gate_matrix[[0, 2]] * amp10
604 + gate_matrix[[0, 3]] * amp11;
605 self.data[i01] = gate_matrix[[1, 0]] * amp00
606 + gate_matrix[[1, 1]] * amp01
607 + gate_matrix[[1, 2]] * amp10
608 + gate_matrix[[1, 3]] * amp11;
609 self.data[i10] = gate_matrix[[2, 0]] * amp00
610 + gate_matrix[[2, 1]] * amp01
611 + gate_matrix[[2, 2]] * amp10
612 + gate_matrix[[2, 3]] * amp11;
613 self.data[i11] = gate_matrix[[3, 0]] * amp00
614 + gate_matrix[[3, 1]] * amp01
615 + gate_matrix[[3, 2]] * amp10
616 + gate_matrix[[3, 3]] * amp11;
617 }
618 }
619
620 Ok(())
621 }
622
623 fn update_bandwidth_monitor(&self, bytes_accessed: usize, elapsed: Duration) {
625 if let Ok(mut monitor) = self.bandwidth_monitor.write() {
626 let bandwidth = bytes_accessed as f64 / elapsed.as_secs_f64();
627 let now = Instant::now();
628
629 monitor.bandwidth_samples.push_back((now, bandwidth));
630
631 while monitor.bandwidth_samples.len() > 100 {
633 monitor.bandwidth_samples.pop_front();
634 }
635
636 if bandwidth > monitor.peak_bandwidth {
638 monitor.peak_bandwidth = bandwidth;
639 }
640
641 let sum: f64 = monitor.bandwidth_samples.iter().map(|(_, bw)| bw).sum();
642 monitor.average_bandwidth = sum / monitor.bandwidth_samples.len() as f64;
643
644 let theoretical_max = 100.0 * 1024.0 * 1024.0 * 1024.0; monitor.current_utilization = bandwidth / theoretical_max;
647 }
648 }
649
650 pub fn get_bandwidth_stats(&self) -> Result<BandwidthMonitor> {
652 Ok(self.bandwidth_monitor.read().unwrap().clone())
653 }
654
655 pub fn adapt_memory_layout(&mut self) -> Result<()> {
657 if self.layout != MemoryLayout::Adaptive {
658 return Ok(());
659 }
660
661 let access_pattern = self.access_pattern.read().unwrap();
662 let bandwidth_stats = self.bandwidth_monitor.read().unwrap();
663
664 let sequential_ratio = access_pattern.sequential_accesses.len() as f64
666 / (access_pattern.total_accesses as f64 + 1.0);
667
668 let new_layout = if sequential_ratio > 0.8 {
669 MemoryLayout::CacheAligned
670 } else if bandwidth_stats.current_utilization < 0.5 {
671 MemoryLayout::Blocked
672 } else {
673 MemoryLayout::Hierarchical
674 };
675
676 if new_layout != self.layout {
677 let new_data = Self::allocate_with_layout(self.data.len(), new_layout, &self.config)?;
679 self.data = new_data;
681 self.layout = new_layout;
682 }
683
684 Ok(())
685 }
686
687 pub fn get_memory_stats(&self) -> MemoryStats {
689 let element_size = std::mem::size_of::<Complex64>();
690 MemoryStats {
691 total_memory: self.data.len() * element_size,
692 allocated_memory: self.data.capacity() * element_size,
693 layout: self.layout,
694 cache_efficiency: self.calculate_cache_efficiency(),
695 memory_utilization: self.calculate_memory_utilization(),
696 }
697 }
698
699 fn calculate_cache_efficiency(&self) -> f64 {
701 let access_pattern = self.access_pattern.read().unwrap();
702 if access_pattern.total_accesses == 0 {
703 return 1.0;
704 }
705
706 let hit_rate =
707 1.0 - (access_pattern.cache_misses as f64 / access_pattern.total_accesses as f64);
708 hit_rate.max(0.0).min(1.0)
709 }
710
711 fn calculate_memory_utilization(&self) -> f64 {
713 let bandwidth_stats = self.bandwidth_monitor.read().unwrap();
714 bandwidth_stats.current_utilization
715 }
716
717 pub fn data(&self) -> &[Complex64] {
719 &self.data
720 }
721
722 pub fn data_mut(&mut self) -> &mut [Complex64] {
724 if let Ok(mut pattern) = self.access_pattern.write() {
726 pattern.total_accesses += 1;
727 pattern.last_access_time = Instant::now();
728 }
729
730 &mut self.data
731 }
732}
733
734#[derive(Debug, Clone)]
736pub struct MemoryStats {
737 pub total_memory: usize,
739 pub allocated_memory: usize,
741 pub layout: MemoryLayout,
743 pub cache_efficiency: f64,
745 pub memory_utilization: f64,
747}
748
749#[derive(Debug)]
751pub struct MemoryBandwidthOptimizer {
752 config: MemoryOptimizationConfig,
754 memory_pool: Arc<MemoryPool>,
756 backend: Option<SciRS2Backend>,
758}
759
760impl MemoryBandwidthOptimizer {
761 pub fn new(config: MemoryOptimizationConfig) -> Result<Self> {
763 let memory_pool = Arc::new(MemoryPool::new(config.memory_pool_size / 1024, 1024)?);
764
765 Ok(Self {
766 config,
767 memory_pool,
768 backend: None,
769 })
770 }
771
772 pub fn init_scirs2_backend(&mut self) -> Result<()> {
774 let backend = SciRS2Backend::new();
776 self.backend = Some(backend);
777 Ok(())
778 }
779
780 pub fn create_optimized_state_vector(&self, num_qubits: usize) -> Result<OptimizedStateVector> {
782 OptimizedStateVector::new(num_qubits, self.config.clone())
783 }
784
785 pub fn optimize_circuit_memory_access(
787 &self,
788 state_vector: &mut OptimizedStateVector,
789 circuit_depth: usize,
790 ) -> Result<MemoryOptimizationReport> {
791 let start_time = Instant::now();
792
793 let estimated_accesses = circuit_depth * state_vector.data.len();
795
796 state_vector.adapt_memory_layout()?;
798
799 if self.config.enable_prefetching {
801 Self::warmup_caches(state_vector)?;
802 }
803
804 let optimization_time = start_time.elapsed();
805
806 Ok(MemoryOptimizationReport {
807 optimization_time,
808 estimated_memory_accesses: estimated_accesses,
809 cache_warmup_performed: self.config.enable_prefetching,
810 layout_adaptation_performed: true,
811 memory_stats: state_vector.get_memory_stats(),
812 })
813 }
814
815 fn warmup_caches(state_vector: &OptimizedStateVector) -> Result<()> {
817 let chunk_size = state_vector.config.cache_line_size / std::mem::size_of::<Complex64>();
818
819 for chunk_start in (0..state_vector.data.len()).step_by(chunk_size) {
820 let chunk_end = std::cmp::min(chunk_start + chunk_size, state_vector.data.len());
821
822 for i in (chunk_start..chunk_end).step_by(chunk_size / 4) {
824 let _ = state_vector.data[i]; }
826 }
827
828 Ok(())
829 }
830}
831
832#[derive(Debug, Clone)]
834pub struct MemoryOptimizationReport {
835 pub optimization_time: Duration,
837 pub estimated_memory_accesses: usize,
839 pub cache_warmup_performed: bool,
841 pub layout_adaptation_performed: bool,
843 pub memory_stats: MemoryStats,
845}
846
847#[cfg(test)]
848mod tests {
849 use super::*;
850 use scirs2_core::ndarray::Array2;
851
852 #[test]
853 fn test_optimized_state_vector_creation() {
854 let config = MemoryOptimizationConfig::default();
855 let state_vector = OptimizedStateVector::new(3, config).unwrap();
856
857 assert_eq!(state_vector.num_qubits, 3);
858 assert_eq!(state_vector.data.len(), 8);
859 assert_eq!(state_vector.data[0], Complex64::new(1.0, 0.0));
860 }
861
862 #[test]
863 fn test_memory_layouts() {
864 let config = MemoryOptimizationConfig {
865 layout: MemoryLayout::CacheAligned,
866 ..Default::default()
867 };
868
869 let state_vector = OptimizedStateVector::new(4, config).unwrap();
870 assert_eq!(state_vector.layout, MemoryLayout::CacheAligned);
871 }
872
873 #[test]
874 fn test_single_qubit_gate_optimization() {
875 let config = MemoryOptimizationConfig::default();
876 let mut state_vector = OptimizedStateVector::new(2, config).unwrap();
877
878 let gate_matrix = Array2::from_shape_vec(
880 (2, 2),
881 vec![
882 Complex64::new(0.0, 0.0),
883 Complex64::new(1.0, 0.0),
884 Complex64::new(1.0, 0.0),
885 Complex64::new(0.0, 0.0),
886 ],
887 )
888 .unwrap();
889
890 state_vector
891 .apply_single_qubit_gate_optimized(0, &gate_matrix)
892 .unwrap();
893
894 assert!((state_vector.data[1].re - 1.0).abs() < 1e-10);
896 assert!(state_vector.data[0].re.abs() < 1e-10);
897 }
898
899 #[test]
900 fn test_bandwidth_monitoring() {
901 let config = MemoryOptimizationConfig::default();
902 let state_vector = OptimizedStateVector::new(3, config).unwrap();
903
904 let stats = state_vector.get_bandwidth_stats().unwrap();
905 assert_eq!(stats.bandwidth_samples.len(), 0); }
907
908 #[test]
909 fn test_memory_pool() {
910 let pool = MemoryPool::new(1024, 10).unwrap();
911
912 let ptr1 = pool.allocate().unwrap();
913 let ptr2 = pool.allocate().unwrap();
914
915 pool.deallocate(ptr1).unwrap();
916 pool.deallocate(ptr2).unwrap();
917 }
918
919 #[test]
920 fn test_cache_aligned_allocation() {
921 let config = MemoryOptimizationConfig {
922 layout: MemoryLayout::CacheAligned,
923 cache_line_size: 64,
924 ..Default::default()
925 };
926
927 let data = OptimizedStateVector::allocate_cache_aligned(100, &config).unwrap();
928
929 let element_size = std::mem::size_of::<Complex64>();
931 let elements_per_line = config.cache_line_size / element_size;
932 let expected_padded =
933 ((100 + elements_per_line - 1) / elements_per_line) * elements_per_line;
934
935 assert_eq!(data.len(), expected_padded);
936 }
937
938 #[test]
939 fn test_memory_bandwidth_optimizer() {
940 let config = MemoryOptimizationConfig::default();
941 let optimizer = MemoryBandwidthOptimizer::new(config).unwrap();
942
943 let mut state_vector = optimizer.create_optimized_state_vector(4).unwrap();
944 let report = optimizer
945 .optimize_circuit_memory_access(&mut state_vector, 10)
946 .unwrap();
947
948 assert!(report.optimization_time.as_millis() < u128::MAX);
950 assert_eq!(report.estimated_memory_accesses, 10 * 16); }
952
953 #[test]
954 fn test_adaptive_layout() {
955 let config = MemoryOptimizationConfig {
956 layout: MemoryLayout::Adaptive,
957 ..Default::default()
958 };
959
960 let mut state_vector = OptimizedStateVector::new(3, config).unwrap();
961 state_vector.adapt_memory_layout().unwrap();
962
963 assert!(matches!(
965 state_vector.layout,
966 MemoryLayout::CacheAligned | MemoryLayout::Blocked | MemoryLayout::Hierarchical
967 ));
968 }
969
970 #[test]
971 fn test_memory_stats() {
972 let config = MemoryOptimizationConfig::default();
973 let state_vector = OptimizedStateVector::new(4, config).unwrap();
974
975 let stats = state_vector.get_memory_stats();
976 assert_eq!(stats.total_memory, 16 * std::mem::size_of::<Complex64>());
977 assert!(stats.cache_efficiency >= 0.0 && stats.cache_efficiency <= 1.0);
978 }
979
980 #[test]
981 fn test_blocked_layout_allocation() {
982 let config = MemoryOptimizationConfig {
983 layout: MemoryLayout::Blocked,
984 block_size: 1024,
985 ..Default::default()
986 };
987
988 let data = OptimizedStateVector::allocate_blocked(100, &config).unwrap();
989 assert_eq!(data.len(), 100);
990 }
991
992 #[test]
993 fn test_prefetch_functionality() {
994 let config = MemoryOptimizationConfig {
995 enable_prefetching: true,
996 prefetch_distance: 4,
997 ..Default::default()
998 };
999
1000 let state_vector = OptimizedStateVector::new(5, config).unwrap();
1001
1002 OptimizedStateVector::prefetch_memory(&state_vector.data[0]);
1004 }
1005}