1use scirs2_core::ndarray::Array2;
8use scirs2_core::Complex64;
9#[cfg(target_arch = "x86_64")]
10use std::arch::x86_64::*;
11use std::collections::{HashMap, VecDeque};
12use std::sync::{Arc, Mutex};
13use std::time::{Duration, Instant};
14
15use crate::error::{Result, SimulatorError};
16use crate::memory_bandwidth_optimization::MemoryOptimizationConfig;
17
18#[derive(Debug, Clone)]
20pub struct CacheHierarchyConfig {
21 pub l1_size: usize,
23 pub l1_line_size: usize,
25 pub l1_associativity: usize,
27 pub l2_size: usize,
29 pub l2_line_size: usize,
31 pub l2_associativity: usize,
33 pub l3_size: usize,
35 pub l3_line_size: usize,
37 pub l3_associativity: usize,
39 pub memory_latency: usize,
41 pub replacement_policy: CacheReplacementPolicy,
43}
44
45impl Default for CacheHierarchyConfig {
46 fn default() -> Self {
47 Self {
48 l1_size: 32 * 1024, l1_line_size: 64, l1_associativity: 8, l2_size: 256 * 1024, l2_line_size: 64, l2_associativity: 8, l3_size: 8 * 1024 * 1024, l3_line_size: 64, l3_associativity: 16, memory_latency: 300, replacement_policy: CacheReplacementPolicy::LRU,
59 }
60 }
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum CacheReplacementPolicy {
66 LRU,
68 FIFO,
70 Random,
72 LFU,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum CacheOptimizedLayout {
79 Linear,
81 Blocked,
83 ZOrder,
85 Hilbert,
87 BitReversal,
89 Strided,
91 Hierarchical,
93}
94
95#[derive(Debug, Clone)]
97pub struct CacheAccessPattern {
98 pub line_access_counts: HashMap<usize, u64>,
100 pub cache_hits: u64,
102 pub cache_misses: u64,
103 pub access_sequence: VecDeque<usize>,
105 pub detected_strides: Vec<isize>,
107 pub temporal_locality: f64,
109 pub spatial_locality: f64,
111}
112
113impl Default for CacheAccessPattern {
114 fn default() -> Self {
115 Self {
116 line_access_counts: HashMap::new(),
117 cache_hits: 0,
118 cache_misses: 0,
119 access_sequence: VecDeque::new(),
120 detected_strides: Vec::new(),
121 temporal_locality: 0.0,
122 spatial_locality: 0.0,
123 }
124 }
125}
126
127#[derive(Debug)]
129pub struct CacheOptimizedStateVector {
130 data: Vec<Complex64>,
132 num_qubits: usize,
134 layout: CacheOptimizedLayout,
136 cache_config: CacheHierarchyConfig,
138 memory_config: MemoryOptimizationConfig,
140 access_pattern: Arc<Mutex<CacheAccessPattern>>,
142 layout_indices: Vec<usize>,
144 inverse_indices: Vec<usize>,
146}
147
148impl CacheOptimizedStateVector {
149 pub fn new(
151 num_qubits: usize,
152 layout: CacheOptimizedLayout,
153 cache_config: CacheHierarchyConfig,
154 memory_config: MemoryOptimizationConfig,
155 ) -> Result<Self> {
156 let size = 1 << num_qubits;
157
158 let (layout_indices, inverse_indices) = Self::generate_layout_indices(size, layout)?;
160
161 let mut data = vec![Complex64::new(0.0, 0.0); size];
163 data[0] = Complex64::new(1.0, 0.0); let mut instance = Self {
167 data,
168 num_qubits,
169 layout,
170 cache_config,
171 memory_config,
172 access_pattern: Arc::new(Mutex::new(CacheAccessPattern::default())),
173 layout_indices,
174 inverse_indices,
175 };
176
177 instance.apply_layout_transformation()?;
178
179 Ok(instance)
180 }
181
182 fn generate_layout_indices(
184 size: usize,
185 layout: CacheOptimizedLayout,
186 ) -> Result<(Vec<usize>, Vec<usize>)> {
187 let indices = match layout {
188 CacheOptimizedLayout::Linear => (0..size).collect::<Vec<usize>>(),
189 CacheOptimizedLayout::Blocked => Self::generate_blocked_indices(size)?,
190 CacheOptimizedLayout::ZOrder => Self::generate_z_order_indices(size)?,
191 CacheOptimizedLayout::Hilbert => Self::generate_hilbert_indices(size)?,
192 CacheOptimizedLayout::BitReversal => Self::generate_bit_reversal_indices(size)?,
193 CacheOptimizedLayout::Strided => Self::generate_strided_indices(size)?,
194 CacheOptimizedLayout::Hierarchical => Self::generate_hierarchical_indices(size)?,
195 };
196
197 let mut inverse_indices = vec![0; size];
199 for (new_idx, &old_idx) in indices.iter().enumerate() {
200 inverse_indices[old_idx] = new_idx;
201 }
202
203 Ok((indices, inverse_indices))
204 }
205
206 fn generate_blocked_indices(size: usize) -> Result<Vec<usize>> {
208 let block_size = 64 / std::mem::size_of::<Complex64>(); let mut indices = Vec::with_capacity(size);
210
211 for block_start in (0..size).step_by(block_size) {
212 let block_end = std::cmp::min(block_start + block_size, size);
213 for i in block_start..block_end {
214 indices.push(i);
215 }
216 }
217
218 Ok(indices)
219 }
220
221 fn generate_z_order_indices(size: usize) -> Result<Vec<usize>> {
223 let bits = (size as f64).log2() as usize;
224 if 1 << bits != size {
225 return Err(SimulatorError::InvalidParameter(
226 "Z-order layout requires power-of-2 size".to_string(),
227 ));
228 }
229
230 let mut indices = Vec::with_capacity(size);
231
232 for i in 0..size {
233 let morton_index = Self::morton_encode_2d(i, bits / 2);
234 indices.push(morton_index % size);
235 }
236
237 Ok(indices)
238 }
239
240 fn morton_encode_2d(index: usize, bits_per_dim: usize) -> usize {
242 let x = index & ((1 << bits_per_dim) - 1);
243 let y = index >> bits_per_dim;
244
245 let mut result = 0;
246 for i in 0..bits_per_dim {
247 result |= ((x >> i) & 1) << (2 * i);
248 result |= ((y >> i) & 1) << (2 * i + 1);
249 }
250
251 result
252 }
253
254 fn generate_hilbert_indices(size: usize) -> Result<Vec<usize>> {
256 let bits = (size as f64).log2() as usize;
257 if 1 << bits != size {
258 return Err(SimulatorError::InvalidParameter(
259 "Hilbert layout requires power-of-2 size".to_string(),
260 ));
261 }
262
263 let order = bits / 2;
264 let mut indices = Vec::with_capacity(size);
265
266 for i in 0..size {
267 let (x, y) = Self::hilbert_index_to_xy(i, order);
268 let linear_index = y * (1 << order) + x;
269 indices.push(linear_index % size);
270 }
271
272 Ok(indices)
273 }
274
275 fn hilbert_index_to_xy(mut index: usize, order: usize) -> (usize, usize) {
277 let mut x = 0;
278 let mut y = 0;
279
280 for s in (0..order).rev() {
281 let rx = 1 & (index >> 1);
282 let ry = 1 & (index ^ rx);
283
284 if ry == 0 {
285 if rx == 1 {
286 let max_val = (1usize << s).saturating_sub(1);
288 x = max_val.saturating_sub(x);
289 y = max_val.saturating_sub(y);
290 }
291 std::mem::swap(&mut x, &mut y);
292 }
293
294 x += rx << s;
295 y += ry << s;
296 index >>= 2;
297 }
298
299 (x, y)
300 }
301
302 fn generate_bit_reversal_indices(size: usize) -> Result<Vec<usize>> {
304 let bits = (size as f64).log2() as usize;
305 if 1 << bits != size {
306 return Err(SimulatorError::InvalidParameter(
307 "Bit-reversal layout requires power-of-2 size".to_string(),
308 ));
309 }
310
311 let mut indices = Vec::with_capacity(size);
312
313 for i in 0..size {
314 let reversed = Self::reverse_bits(i, bits);
315 indices.push(reversed);
316 }
317
318 Ok(indices)
319 }
320
321 fn reverse_bits(mut num: usize, bits: usize) -> usize {
323 let mut result = 0;
324 for _ in 0..bits {
325 result = (result << 1) | (num & 1);
326 num >>= 1;
327 }
328 result
329 }
330
331 fn generate_strided_indices(size: usize) -> Result<Vec<usize>> {
333 let stride = 8; let mut indices = Vec::with_capacity(size);
335
336 for group in 0..size.div_ceil(stride) {
337 for offset in 0..stride {
338 let index = group * stride + offset;
339 if index < size {
340 indices.push(index);
341 }
342 }
343 }
344
345 Ok(indices)
346 }
347
348 fn generate_hierarchical_indices(size: usize) -> Result<Vec<usize>> {
350 let l1_block_size = 32 / std::mem::size_of::<Complex64>(); let l2_block_size = 256 / std::mem::size_of::<Complex64>(); let mut indices = Vec::with_capacity(size);
354
355 for l2_block in 0..size.div_ceil(l2_block_size) {
356 let l2_start = l2_block * l2_block_size;
357 let l2_end = std::cmp::min(l2_start + l2_block_size, size);
358
359 for l1_block_start in (l2_start..l2_end).step_by(l1_block_size) {
360 let l1_block_end = std::cmp::min(l1_block_start + l1_block_size, l2_end);
361
362 for i in l1_block_start..l1_block_end {
363 indices.push(i);
364 }
365 }
366 }
367
368 Ok(indices)
369 }
370
371 fn apply_layout_transformation(&mut self) -> Result<()> {
373 let original_data = self.data.clone();
374
375 for (new_idx, &old_idx) in self.layout_indices.iter().enumerate() {
376 self.data[new_idx] = original_data[old_idx];
377 }
378
379 Ok(())
380 }
381
382 fn logical_to_physical(&self, logical_index: usize) -> usize {
384 self.inverse_indices[logical_index]
385 }
386
387 fn physical_to_logical(&self, physical_index: usize) -> usize {
389 self.layout_indices[physical_index]
390 }
391
392 pub fn apply_single_qubit_gate_cache_optimized(
394 &mut self,
395 target: usize,
396 gate_matrix: &Array2<Complex64>,
397 ) -> Result<()> {
398 let start_time = Instant::now();
399 let mask = 1 << target;
400
401 match self.layout {
403 CacheOptimizedLayout::Blocked => {
404 self.apply_single_qubit_blocked(target, gate_matrix, mask)?;
405 }
406 CacheOptimizedLayout::ZOrder => {
407 self.apply_single_qubit_z_order(target, gate_matrix, mask)?;
408 }
409 CacheOptimizedLayout::Hilbert => {
410 self.apply_single_qubit_hilbert(target, gate_matrix, mask)?;
411 }
412 CacheOptimizedLayout::Strided => {
413 self.apply_single_qubit_strided(target, gate_matrix, mask)?;
414 }
415 _ => {
416 self.apply_single_qubit_linear(target, gate_matrix, mask)?;
417 }
418 }
419
420 self.update_access_statistics(start_time.elapsed());
422
423 Ok(())
424 }
425
426 fn apply_single_qubit_blocked(
428 &mut self,
429 _target: usize,
430 gate_matrix: &Array2<Complex64>,
431 mask: usize,
432 ) -> Result<()> {
433 let block_size = self.cache_config.l1_line_size / std::mem::size_of::<Complex64>();
434
435 for block_start in (0..self.data.len()).step_by(block_size) {
436 let block_end = std::cmp::min(block_start + block_size, self.data.len());
437
438 if block_end < self.data.len() {
440 #[cfg(target_arch = "x86_64")]
441 unsafe {
442 std::arch::x86_64::_mm_prefetch(
443 self.data.as_ptr().add(block_end) as *const i8,
444 std::arch::x86_64::_MM_HINT_T0,
445 );
446 }
447 }
448
449 for i in (block_start..block_end).step_by(2) {
451 if i + 1 < self.data.len() {
452 let logical_i0 = self.physical_to_logical(i);
453 let logical_i1 = logical_i0 | mask;
454 let physical_i1 = self.logical_to_physical(logical_i1);
455
456 if physical_i1 < self.data.len() {
457 let amp0 = self.data[i];
458 let amp1 = self.data[physical_i1];
459
460 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
461 self.data[physical_i1] =
462 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
463
464 self.track_cache_access(i);
466 self.track_cache_access(physical_i1);
467 }
468 }
469 }
470 }
471
472 Ok(())
473 }
474
475 fn apply_single_qubit_z_order(
477 &mut self,
478 _target: usize,
479 gate_matrix: &Array2<Complex64>,
480 mask: usize,
481 ) -> Result<()> {
482 let bits = self.num_qubits;
484 let tile_size = 4; for tile_y in (0..(1 << (bits / 2))).step_by(tile_size) {
487 for tile_x in (0..(1 << (bits / 2))).step_by(tile_size) {
488 for y in tile_y..std::cmp::min(tile_y + tile_size, 1 << (bits / 2)) {
489 for x in tile_x..std::cmp::min(tile_x + tile_size, 1 << (bits / 2)) {
490 let logical_index = y * (1 << (bits / 2)) + x;
491 let logical_i0 = logical_index & !mask;
492 let logical_i1 = logical_i0 | mask;
493
494 let physical_i0 = self.logical_to_physical(logical_i0);
495 let physical_i1 = self.logical_to_physical(logical_i1);
496
497 if physical_i0 < self.data.len() && physical_i1 < self.data.len() {
498 let amp0 = self.data[physical_i0];
499 let amp1 = self.data[physical_i1];
500
501 self.data[physical_i0] =
502 gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
503 self.data[physical_i1] =
504 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
505
506 self.track_cache_access(physical_i0);
507 self.track_cache_access(physical_i1);
508 }
509 }
510 }
511 }
512 }
513
514 Ok(())
515 }
516
517 fn apply_single_qubit_hilbert(
519 &mut self,
520 _target: usize,
521 gate_matrix: &Array2<Complex64>,
522 mask: usize,
523 ) -> Result<()> {
524 let order = self.num_qubits / 2;
526 let curve_length = 1 << self.num_qubits;
527
528 for hilbert_index in (0..curve_length).step_by(2) {
529 let (x, y) = Self::hilbert_index_to_xy(hilbert_index, order);
530 let logical_index = y * (1 << order) + x;
531
532 let logical_i0 = logical_index & !mask;
533 let logical_i1 = logical_i0 | mask;
534
535 let physical_i0 = self.logical_to_physical(logical_i0);
536 let physical_i1 = self.logical_to_physical(logical_i1);
537
538 if physical_i0 < self.data.len() && physical_i1 < self.data.len() {
539 let amp0 = self.data[physical_i0];
540 let amp1 = self.data[physical_i1];
541
542 self.data[physical_i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
543 self.data[physical_i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
544
545 self.track_cache_access(physical_i0);
546 self.track_cache_access(physical_i1);
547 }
548 }
549
550 Ok(())
551 }
552
553 fn apply_single_qubit_strided(
555 &mut self,
556 _target: usize,
557 gate_matrix: &Array2<Complex64>,
558 mask: usize,
559 ) -> Result<()> {
560 let stride = 8; for group_start in (0..self.data.len()).step_by(stride * 2) {
563 for offset in 0..stride {
565 let i = group_start + offset;
566 if i + stride < self.data.len() {
567 let logical_i0 = self.physical_to_logical(i);
568 let logical_i1 = logical_i0 | mask;
569 let physical_i1 = self.logical_to_physical(logical_i1);
570
571 if physical_i1 < self.data.len() {
572 let amp0 = self.data[i];
573 let amp1 = self.data[physical_i1];
574
575 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
576 self.data[physical_i1] =
577 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
578
579 self.track_cache_access(i);
580 self.track_cache_access(physical_i1);
581 }
582 }
583 }
584 }
585
586 Ok(())
587 }
588
589 fn apply_single_qubit_linear(
591 &mut self,
592 _target: usize,
593 gate_matrix: &Array2<Complex64>,
594 mask: usize,
595 ) -> Result<()> {
596 for i in (0..self.data.len()).step_by(2) {
597 let logical_i0 = self.physical_to_logical(i);
598 let logical_i1 = logical_i0 | mask;
599 let physical_i1 = self.logical_to_physical(logical_i1);
600
601 if physical_i1 < self.data.len() {
602 let amp0 = self.data[i];
603 let amp1 = self.data[physical_i1];
604
605 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
606 self.data[physical_i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
607
608 self.track_cache_access(i);
609 self.track_cache_access(physical_i1);
610 }
611 }
612
613 Ok(())
614 }
615
616 fn track_cache_access(&self, physical_index: usize) {
618 if let Ok(mut pattern) = self.access_pattern.lock() {
619 let cache_line = physical_index
620 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
621
622 *pattern.line_access_counts.entry(cache_line).or_insert(0) += 1;
624
625 pattern.access_sequence.push_back(physical_index);
627 if pattern.access_sequence.len() > 1000 {
628 pattern.access_sequence.pop_front();
629 }
630
631 if pattern.line_access_counts.get(&cache_line).unwrap_or(&0) > &1 {
633 pattern.cache_hits += 1;
634 } else {
635 pattern.cache_misses += 1;
636 }
637 }
638 }
639
640 fn update_access_statistics(&self, operation_time: Duration) {
642 if let Ok(mut pattern) = self.access_pattern.lock() {
643 let total_accesses = pattern.cache_hits + pattern.cache_misses;
645 if total_accesses > 0 {
646 pattern.temporal_locality = pattern.cache_hits as f64 / total_accesses as f64;
647 }
648
649 if pattern.access_sequence.len() > 1 {
651 let mut spatial_hits = 0;
652 let mut total_pairs = 0;
653
654 for window in pattern.access_sequence.as_slices().0.windows(2) {
655 if let [addr1, addr2] = window {
656 let line1 = addr1
657 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
658 let line2 = addr2
659 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
660
661 if line1 == line2 || line1.abs_diff(line2) <= 1 {
662 spatial_hits += 1;
663 }
664 total_pairs += 1;
665 }
666 }
667
668 if total_pairs > 0 {
669 pattern.spatial_locality = f64::from(spatial_hits) / f64::from(total_pairs);
670 }
671 }
672
673 if pattern.access_sequence.len() >= 3 {
675 let recent_accesses: Vec<_> =
676 pattern.access_sequence.iter().rev().take(10).collect();
677 pattern.detected_strides = Self::detect_strides(&recent_accesses);
678 }
679 }
680 }
681
682 fn detect_strides(accesses: &[&usize]) -> Vec<isize> {
684 let mut strides = Vec::new();
685
686 if accesses.len() >= 3 {
687 for window in accesses.windows(3) {
688 if let [addr1, addr2, addr3] = window {
689 let stride1 = **addr2 as isize - **addr1 as isize;
690 let stride2 = **addr3 as isize - **addr2 as isize;
691
692 if stride1 == stride2 && stride1 != 0 {
693 strides.push(stride1);
694 }
695 }
696 }
697 }
698
699 strides
700 }
701
702 pub fn get_cache_stats(&self) -> Result<CachePerformanceStats> {
704 let pattern = self.access_pattern.lock().map_err(|e| {
705 SimulatorError::InvalidOperation(format!("Failed to acquire access pattern lock: {e}"))
706 })?;
707 let total_accesses = pattern.cache_hits + pattern.cache_misses;
708
709 Ok(CachePerformanceStats {
710 cache_hit_rate: if total_accesses > 0 {
711 pattern.cache_hits as f64 / total_accesses as f64
712 } else {
713 0.0
714 },
715 cache_miss_rate: if total_accesses > 0 {
716 pattern.cache_misses as f64 / total_accesses as f64
717 } else {
718 0.0
719 },
720 temporal_locality: pattern.temporal_locality,
721 spatial_locality: pattern.spatial_locality,
722 total_cache_lines_accessed: pattern.line_access_counts.len(),
723 average_accesses_per_line: if pattern.line_access_counts.is_empty() {
724 0.0
725 } else {
726 pattern.line_access_counts.values().sum::<u64>() as f64
727 / pattern.line_access_counts.len() as f64
728 },
729 detected_strides: pattern.detected_strides.clone(),
730 current_layout: self.layout,
731 })
732 }
733
734 pub fn adapt_cache_layout(&mut self) -> Result<CacheLayoutAdaptationResult> {
736 let stats = self.get_cache_stats()?;
737 let current_performance = stats.cache_hit_rate;
738
739 let recommended_layout = if stats.spatial_locality > 0.8 {
741 CacheOptimizedLayout::Blocked
742 } else if stats.detected_strides.iter().any(|&s| s.abs() == 1) {
743 CacheOptimizedLayout::Linear
744 } else if stats.detected_strides.len() > 2 {
745 CacheOptimizedLayout::Strided
746 } else if stats.temporal_locality < 0.5 {
747 CacheOptimizedLayout::Hilbert
748 } else {
749 CacheOptimizedLayout::ZOrder
750 };
751
752 let layout_changed = recommended_layout != self.layout;
753
754 if layout_changed {
755 let old_layout = self.layout;
757
758 let (new_indices, new_inverse) =
760 Self::generate_layout_indices(self.data.len(), recommended_layout)?;
761 self.layout_indices = new_indices;
762 self.inverse_indices = new_inverse;
763 self.layout = recommended_layout;
764
765 self.apply_layout_transformation()?;
767
768 Ok(CacheLayoutAdaptationResult {
769 layout_changed: true,
770 old_layout,
771 new_layout: recommended_layout,
772 performance_before: current_performance,
773 expected_performance_improvement: 0.1, adaptation_overhead: Duration::from_millis(1), })
776 } else {
777 Ok(CacheLayoutAdaptationResult {
778 layout_changed: false,
779 old_layout: self.layout,
780 new_layout: self.layout,
781 performance_before: current_performance,
782 expected_performance_improvement: 0.0,
783 adaptation_overhead: Duration::from_nanos(0),
784 })
785 }
786 }
787
788 #[must_use]
790 pub fn data(&self) -> &[Complex64] {
791 &self.data
792 }
793
794 #[must_use]
796 pub const fn layout(&self) -> CacheOptimizedLayout {
797 self.layout
798 }
799
800 #[must_use]
802 pub const fn num_qubits(&self) -> usize {
803 self.num_qubits
804 }
805}
806
807#[derive(Debug, Clone)]
809pub struct CachePerformanceStats {
810 pub cache_hit_rate: f64,
812 pub cache_miss_rate: f64,
814 pub temporal_locality: f64,
816 pub spatial_locality: f64,
818 pub total_cache_lines_accessed: usize,
820 pub average_accesses_per_line: f64,
822 pub detected_strides: Vec<isize>,
824 pub current_layout: CacheOptimizedLayout,
826}
827
828#[derive(Debug, Clone)]
830pub struct CacheLayoutAdaptationResult {
831 pub layout_changed: bool,
833 pub old_layout: CacheOptimizedLayout,
835 pub new_layout: CacheOptimizedLayout,
837 pub performance_before: f64,
839 pub expected_performance_improvement: f64,
841 pub adaptation_overhead: Duration,
843}
844
845#[derive(Debug)]
847pub struct CacheOptimizedGateManager {
848 cache_config: CacheHierarchyConfig,
850 operation_stats: HashMap<String, CacheOperationStats>,
852}
853
854impl CacheOptimizedGateManager {
855 #[must_use]
857 pub fn new(cache_config: CacheHierarchyConfig) -> Self {
858 Self {
859 cache_config,
860 operation_stats: HashMap::new(),
861 }
862 }
863
864 pub fn execute_gate(
866 &mut self,
867 state_vector: &mut CacheOptimizedStateVector,
868 gate_name: &str,
869 target_qubits: &[usize],
870 gate_matrix: &Array2<Complex64>,
871 ) -> Result<CacheOperationStats> {
872 let start_time = Instant::now();
873
874 match target_qubits.len() {
875 1 => {
876 state_vector
877 .apply_single_qubit_gate_cache_optimized(target_qubits[0], gate_matrix)?;
878 }
879 2 => {
880 return Err(SimulatorError::NotImplemented(
882 "Two-qubit cache-optimized gates not implemented".to_string(),
883 ));
884 }
885 _ => {
886 return Err(SimulatorError::InvalidParameter(
887 "Only single and two-qubit gates supported".to_string(),
888 ));
889 }
890 }
891
892 let execution_time = start_time.elapsed();
893 let cache_stats = state_vector.get_cache_stats()?;
894
895 let operation_stats = CacheOperationStats {
896 gate_name: gate_name.to_string(),
897 execution_time,
898 cache_hit_rate: cache_stats.cache_hit_rate,
899 spatial_locality: cache_stats.spatial_locality,
900 temporal_locality: cache_stats.temporal_locality,
901 memory_accesses: cache_stats.total_cache_lines_accessed,
902 };
903
904 self.operation_stats
905 .insert(gate_name.to_string(), operation_stats.clone());
906
907 Ok(operation_stats)
908 }
909
910 #[must_use]
912 pub fn get_operation_statistics(&self) -> HashMap<String, CacheOperationStats> {
913 self.operation_stats.clone()
914 }
915}
916
917#[derive(Debug, Clone)]
919pub struct CacheOperationStats {
920 pub gate_name: String,
922 pub execution_time: Duration,
924 pub cache_hit_rate: f64,
926 pub spatial_locality: f64,
928 pub temporal_locality: f64,
930 pub memory_accesses: usize,
932}
933
934#[cfg(test)]
935mod tests {
936 use super::*;
937 use scirs2_core::ndarray::Array2;
938
939 #[test]
940 fn test_cache_optimized_state_vector_creation() {
941 let cache_config = CacheHierarchyConfig::default();
942 let memory_config = MemoryOptimizationConfig::default();
943
944 let state_vector = CacheOptimizedStateVector::new(
945 3,
946 CacheOptimizedLayout::Blocked,
947 cache_config,
948 memory_config,
949 )
950 .expect("Failed to create cache-optimized state vector");
951
952 assert_eq!(state_vector.num_qubits(), 3);
953 assert_eq!(state_vector.data().len(), 8);
954 assert_eq!(state_vector.layout(), CacheOptimizedLayout::Blocked);
955 }
956
957 #[test]
958 fn test_blocked_layout_indices() {
959 let indices = CacheOptimizedStateVector::generate_blocked_indices(16)
960 .expect("Failed to generate blocked indices");
961 assert_eq!(indices.len(), 16);
962
963 let mut sorted_indices = indices;
965 sorted_indices.sort_unstable();
966 assert_eq!(sorted_indices, (0..16).collect::<Vec<_>>());
967 }
968
969 #[test]
970 fn test_z_order_layout() {
971 let indices = CacheOptimizedStateVector::generate_z_order_indices(16)
972 .expect("Failed to generate Z-order indices");
973 assert_eq!(indices.len(), 16);
974 }
975
976 #[test]
977 fn test_hilbert_curve_layout() {
978 let indices = CacheOptimizedStateVector::generate_hilbert_indices(16)
979 .expect("Failed to generate Hilbert curve indices");
980 assert_eq!(indices.len(), 16);
981 }
982
983 #[test]
984 fn test_bit_reversal_layout() {
985 let indices = CacheOptimizedStateVector::generate_bit_reversal_indices(8)
986 .expect("Failed to generate bit reversal indices");
987 assert_eq!(indices.len(), 8);
988
989 assert_eq!(indices[1], 4); assert_eq!(indices[2], 2); assert_eq!(indices[3], 6); }
994
995 #[test]
996 fn test_single_qubit_gate_application() {
997 let cache_config = CacheHierarchyConfig::default();
998 let memory_config = MemoryOptimizationConfig::default();
999
1000 let mut state_vector = CacheOptimizedStateVector::new(
1001 2,
1002 CacheOptimizedLayout::Linear,
1003 cache_config,
1004 memory_config,
1005 )
1006 .expect("Failed to create state vector");
1007
1008 let gate_matrix = Array2::from_shape_vec(
1010 (2, 2),
1011 vec![
1012 Complex64::new(0.0, 0.0),
1013 Complex64::new(1.0, 0.0),
1014 Complex64::new(1.0, 0.0),
1015 Complex64::new(0.0, 0.0),
1016 ],
1017 )
1018 .expect("Failed to create gate matrix");
1019
1020 state_vector
1021 .apply_single_qubit_gate_cache_optimized(0, &gate_matrix)
1022 .expect("Failed to apply single qubit gate");
1023
1024 let cache_stats = state_vector
1026 .get_cache_stats()
1027 .expect("Failed to get cache stats");
1028 assert!(cache_stats.total_cache_lines_accessed > 0);
1029 }
1030
1031 #[test]
1032 fn test_cache_statistics() {
1033 let cache_config = CacheHierarchyConfig::default();
1034 let memory_config = MemoryOptimizationConfig::default();
1035
1036 let state_vector = CacheOptimizedStateVector::new(
1037 3,
1038 CacheOptimizedLayout::Blocked,
1039 cache_config,
1040 memory_config,
1041 )
1042 .expect("Failed to create state vector");
1043
1044 let stats = state_vector
1045 .get_cache_stats()
1046 .expect("Failed to get cache stats");
1047 assert!(stats.cache_hit_rate >= 0.0 && stats.cache_hit_rate <= 1.0);
1048 assert!(stats.spatial_locality >= 0.0 && stats.spatial_locality <= 1.0);
1049 assert!(stats.temporal_locality >= 0.0 && stats.temporal_locality <= 1.0);
1050 }
1051
1052 #[test]
1053 fn test_layout_adaptation() {
1054 let cache_config = CacheHierarchyConfig::default();
1055 let memory_config = MemoryOptimizationConfig::default();
1056
1057 let mut state_vector = CacheOptimizedStateVector::new(
1058 3,
1059 CacheOptimizedLayout::Linear,
1060 cache_config,
1061 memory_config,
1062 )
1063 .expect("Failed to create state vector");
1064
1065 let adaptation_result = state_vector
1066 .adapt_cache_layout()
1067 .expect("Failed to adapt cache layout");
1068
1069 assert!(adaptation_result.performance_before >= 0.0);
1071 assert!(adaptation_result.adaptation_overhead >= Duration::from_nanos(0));
1072 }
1073
1074 #[test]
1075 fn test_cache_optimized_gate_manager() {
1076 let cache_config = CacheHierarchyConfig::default();
1077 let memory_config = MemoryOptimizationConfig::default();
1078
1079 let mut manager = CacheOptimizedGateManager::new(cache_config.clone());
1080 let mut state_vector = CacheOptimizedStateVector::new(
1081 2,
1082 CacheOptimizedLayout::Blocked,
1083 cache_config,
1084 memory_config,
1085 )
1086 .expect("Failed to create state vector");
1087
1088 let gate_matrix = Array2::from_shape_vec(
1090 (2, 2),
1091 vec![
1092 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1093 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1094 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1095 Complex64::new(-1.0 / 2.0_f64.sqrt(), 0.0),
1096 ],
1097 )
1098 .expect("Failed to create gate matrix");
1099
1100 let stats = manager
1101 .execute_gate(&mut state_vector, "H", &[0], &gate_matrix)
1102 .expect("Failed to execute gate");
1103
1104 assert_eq!(stats.gate_name, "H");
1105 assert!(stats.execution_time > Duration::from_nanos(0));
1106 assert!(stats.cache_hit_rate >= 0.0 && stats.cache_hit_rate <= 1.0);
1107 }
1108
1109 #[test]
1110 fn test_morton_encoding() {
1111 let encoded = CacheOptimizedStateVector::morton_encode_2d(5, 2); assert!(encoded < 16); }
1114
1115 #[test]
1116 fn test_hilbert_coordinate_conversion() {
1117 let (x, y) = CacheOptimizedStateVector::hilbert_index_to_xy(3, 2);
1118 assert!(x < 4 && y < 4); }
1120
1121 #[test]
1122 fn test_stride_detection() {
1123 let accesses = vec![&0, &4, &8, &12, &16];
1124 let strides = CacheOptimizedStateVector::detect_strides(&accesses);
1125 assert!(strides.contains(&4)); }
1127}