1use ndarray::Array2;
8use num_complex::Complex64;
9#[cfg(target_arch = "x86_64")]
10use std::arch::x86_64::*;
11use std::collections::{HashMap, VecDeque};
12use std::sync::{Arc, Mutex};
13use std::time::{Duration, Instant};
14
15use crate::error::{Result, SimulatorError};
16use crate::memory_bandwidth_optimization::MemoryOptimizationConfig;
17
18#[derive(Debug, Clone)]
20pub struct CacheHierarchyConfig {
21 pub l1_size: usize,
23 pub l1_line_size: usize,
25 pub l1_associativity: usize,
27 pub l2_size: usize,
29 pub l2_line_size: usize,
31 pub l2_associativity: usize,
33 pub l3_size: usize,
35 pub l3_line_size: usize,
37 pub l3_associativity: usize,
39 pub memory_latency: usize,
41 pub replacement_policy: CacheReplacementPolicy,
43}
44
45impl Default for CacheHierarchyConfig {
46 fn default() -> Self {
47 Self {
48 l1_size: 32 * 1024, l1_line_size: 64, l1_associativity: 8, l2_size: 256 * 1024, l2_line_size: 64, l2_associativity: 8, l3_size: 8 * 1024 * 1024, l3_line_size: 64, l3_associativity: 16, memory_latency: 300, replacement_policy: CacheReplacementPolicy::LRU,
59 }
60 }
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum CacheReplacementPolicy {
66 LRU,
68 FIFO,
70 Random,
72 LFU,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum CacheOptimizedLayout {
79 Linear,
81 Blocked,
83 ZOrder,
85 Hilbert,
87 BitReversal,
89 Strided,
91 Hierarchical,
93}
94
95#[derive(Debug, Clone)]
97pub struct CacheAccessPattern {
98 pub line_access_counts: HashMap<usize, u64>,
100 pub cache_hits: u64,
102 pub cache_misses: u64,
103 pub access_sequence: VecDeque<usize>,
105 pub detected_strides: Vec<isize>,
107 pub temporal_locality: f64,
109 pub spatial_locality: f64,
111}
112
113impl Default for CacheAccessPattern {
114 fn default() -> Self {
115 Self {
116 line_access_counts: HashMap::new(),
117 cache_hits: 0,
118 cache_misses: 0,
119 access_sequence: VecDeque::new(),
120 detected_strides: Vec::new(),
121 temporal_locality: 0.0,
122 spatial_locality: 0.0,
123 }
124 }
125}
126
127#[derive(Debug)]
129pub struct CacheOptimizedStateVector {
130 data: Vec<Complex64>,
132 num_qubits: usize,
134 layout: CacheOptimizedLayout,
136 cache_config: CacheHierarchyConfig,
138 memory_config: MemoryOptimizationConfig,
140 access_pattern: Arc<Mutex<CacheAccessPattern>>,
142 layout_indices: Vec<usize>,
144 inverse_indices: Vec<usize>,
146}
147
148impl CacheOptimizedStateVector {
149 pub fn new(
151 num_qubits: usize,
152 layout: CacheOptimizedLayout,
153 cache_config: CacheHierarchyConfig,
154 memory_config: MemoryOptimizationConfig,
155 ) -> Result<Self> {
156 let size = 1 << num_qubits;
157
158 let (layout_indices, inverse_indices) = Self::generate_layout_indices(size, layout)?;
160
161 let mut data = vec![Complex64::new(0.0, 0.0); size];
163 data[0] = Complex64::new(1.0, 0.0); let mut instance = Self {
167 data,
168 num_qubits,
169 layout,
170 cache_config,
171 memory_config,
172 access_pattern: Arc::new(Mutex::new(CacheAccessPattern::default())),
173 layout_indices,
174 inverse_indices,
175 };
176
177 instance.apply_layout_transformation()?;
178
179 Ok(instance)
180 }
181
182 fn generate_layout_indices(
184 size: usize,
185 layout: CacheOptimizedLayout,
186 ) -> Result<(Vec<usize>, Vec<usize>)> {
187 let indices = match layout {
188 CacheOptimizedLayout::Linear => (0..size).collect::<Vec<usize>>(),
189 CacheOptimizedLayout::Blocked => Self::generate_blocked_indices(size)?,
190 CacheOptimizedLayout::ZOrder => Self::generate_z_order_indices(size)?,
191 CacheOptimizedLayout::Hilbert => Self::generate_hilbert_indices(size)?,
192 CacheOptimizedLayout::BitReversal => Self::generate_bit_reversal_indices(size)?,
193 CacheOptimizedLayout::Strided => Self::generate_strided_indices(size)?,
194 CacheOptimizedLayout::Hierarchical => Self::generate_hierarchical_indices(size)?,
195 };
196
197 let mut inverse_indices = vec![0; size];
199 for (new_idx, &old_idx) in indices.iter().enumerate() {
200 inverse_indices[old_idx] = new_idx;
201 }
202
203 Ok((indices, inverse_indices))
204 }
205
206 fn generate_blocked_indices(size: usize) -> Result<Vec<usize>> {
208 let block_size = 64 / std::mem::size_of::<Complex64>(); let mut indices = Vec::with_capacity(size);
210
211 for block_start in (0..size).step_by(block_size) {
212 let block_end = std::cmp::min(block_start + block_size, size);
213 for i in block_start..block_end {
214 indices.push(i);
215 }
216 }
217
218 Ok(indices)
219 }
220
221 fn generate_z_order_indices(size: usize) -> Result<Vec<usize>> {
223 let bits = (size as f64).log2() as usize;
224 if 1 << bits != size {
225 return Err(SimulatorError::InvalidParameter(
226 "Z-order layout requires power-of-2 size".to_string(),
227 ));
228 }
229
230 let mut indices = Vec::with_capacity(size);
231
232 for i in 0..size {
233 let morton_index = Self::morton_encode_2d(i, bits / 2);
234 indices.push(morton_index % size);
235 }
236
237 Ok(indices)
238 }
239
240 fn morton_encode_2d(index: usize, bits_per_dim: usize) -> usize {
242 let x = index & ((1 << bits_per_dim) - 1);
243 let y = index >> bits_per_dim;
244
245 let mut result = 0;
246 for i in 0..bits_per_dim {
247 result |= ((x >> i) & 1) << (2 * i);
248 result |= ((y >> i) & 1) << (2 * i + 1);
249 }
250
251 result
252 }
253
254 fn generate_hilbert_indices(size: usize) -> Result<Vec<usize>> {
256 let bits = (size as f64).log2() as usize;
257 if 1 << bits != size {
258 return Err(SimulatorError::InvalidParameter(
259 "Hilbert layout requires power-of-2 size".to_string(),
260 ));
261 }
262
263 let order = bits / 2;
264 let mut indices = Vec::with_capacity(size);
265
266 for i in 0..size {
267 let (x, y) = Self::hilbert_index_to_xy(i, order);
268 let linear_index = y * (1 << order) + x;
269 indices.push(linear_index % size);
270 }
271
272 Ok(indices)
273 }
274
275 fn hilbert_index_to_xy(mut index: usize, order: usize) -> (usize, usize) {
277 let mut x = 0;
278 let mut y = 0;
279
280 for s in (0..order).rev() {
281 let rx = 1 & (index >> 1);
282 let ry = 1 & (index ^ rx);
283
284 if ry == 0 {
285 if rx == 1 {
286 let max_val = (1usize << s).saturating_sub(1);
288 x = max_val.saturating_sub(x);
289 y = max_val.saturating_sub(y);
290 }
291 std::mem::swap(&mut x, &mut y);
292 }
293
294 x += rx << s;
295 y += ry << s;
296 index >>= 2;
297 }
298
299 (x, y)
300 }
301
302 fn generate_bit_reversal_indices(size: usize) -> Result<Vec<usize>> {
304 let bits = (size as f64).log2() as usize;
305 if 1 << bits != size {
306 return Err(SimulatorError::InvalidParameter(
307 "Bit-reversal layout requires power-of-2 size".to_string(),
308 ));
309 }
310
311 let mut indices = Vec::with_capacity(size);
312
313 for i in 0..size {
314 let reversed = Self::reverse_bits(i, bits);
315 indices.push(reversed);
316 }
317
318 Ok(indices)
319 }
320
321 fn reverse_bits(mut num: usize, bits: usize) -> usize {
323 let mut result = 0;
324 for _ in 0..bits {
325 result = (result << 1) | (num & 1);
326 num >>= 1;
327 }
328 result
329 }
330
331 fn generate_strided_indices(size: usize) -> Result<Vec<usize>> {
333 let stride = 8; let mut indices = Vec::with_capacity(size);
335
336 for group in 0..(size + stride - 1) / stride {
337 for offset in 0..stride {
338 let index = group * stride + offset;
339 if index < size {
340 indices.push(index);
341 }
342 }
343 }
344
345 Ok(indices)
346 }
347
348 fn generate_hierarchical_indices(size: usize) -> Result<Vec<usize>> {
350 let l1_block_size = 32 / std::mem::size_of::<Complex64>(); let l2_block_size = 256 / std::mem::size_of::<Complex64>(); let mut indices = Vec::with_capacity(size);
354
355 for l2_block in 0..(size + l2_block_size - 1) / l2_block_size {
356 let l2_start = l2_block * l2_block_size;
357 let l2_end = std::cmp::min(l2_start + l2_block_size, size);
358
359 for l1_block_start in (l2_start..l2_end).step_by(l1_block_size) {
360 let l1_block_end = std::cmp::min(l1_block_start + l1_block_size, l2_end);
361
362 for i in l1_block_start..l1_block_end {
363 indices.push(i);
364 }
365 }
366 }
367
368 Ok(indices)
369 }
370
371 fn apply_layout_transformation(&mut self) -> Result<()> {
373 let original_data = self.data.clone();
374
375 for (new_idx, &old_idx) in self.layout_indices.iter().enumerate() {
376 self.data[new_idx] = original_data[old_idx];
377 }
378
379 Ok(())
380 }
381
382 fn logical_to_physical(&self, logical_index: usize) -> usize {
384 self.inverse_indices[logical_index]
385 }
386
387 fn physical_to_logical(&self, physical_index: usize) -> usize {
389 self.layout_indices[physical_index]
390 }
391
392 pub fn apply_single_qubit_gate_cache_optimized(
394 &mut self,
395 target: usize,
396 gate_matrix: &Array2<Complex64>,
397 ) -> Result<()> {
398 let start_time = Instant::now();
399 let mask = 1 << target;
400
401 match self.layout {
403 CacheOptimizedLayout::Blocked => {
404 self.apply_single_qubit_blocked(target, gate_matrix, mask)?;
405 }
406 CacheOptimizedLayout::ZOrder => {
407 self.apply_single_qubit_z_order(target, gate_matrix, mask)?;
408 }
409 CacheOptimizedLayout::Hilbert => {
410 self.apply_single_qubit_hilbert(target, gate_matrix, mask)?;
411 }
412 CacheOptimizedLayout::Strided => {
413 self.apply_single_qubit_strided(target, gate_matrix, mask)?;
414 }
415 _ => {
416 self.apply_single_qubit_linear(target, gate_matrix, mask)?;
417 }
418 }
419
420 self.update_access_statistics(start_time.elapsed());
422
423 Ok(())
424 }
425
426 fn apply_single_qubit_blocked(
428 &mut self,
429 _target: usize,
430 gate_matrix: &Array2<Complex64>,
431 mask: usize,
432 ) -> Result<()> {
433 let block_size = self.cache_config.l1_line_size / std::mem::size_of::<Complex64>();
434
435 for block_start in (0..self.data.len()).step_by(block_size) {
436 let block_end = std::cmp::min(block_start + block_size, self.data.len());
437
438 if block_end < self.data.len() {
440 #[cfg(target_arch = "x86_64")]
441 unsafe {
442 std::arch::x86_64::_mm_prefetch(
443 self.data.as_ptr().add(block_end) as *const i8,
444 std::arch::x86_64::_MM_HINT_T0,
445 );
446 }
447 }
448
449 for i in (block_start..block_end).step_by(2) {
451 if i + 1 < self.data.len() {
452 let logical_i0 = self.physical_to_logical(i);
453 let logical_i1 = logical_i0 | mask;
454 let physical_i1 = self.logical_to_physical(logical_i1);
455
456 if physical_i1 < self.data.len() {
457 let amp0 = self.data[i];
458 let amp1 = self.data[physical_i1];
459
460 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
461 self.data[physical_i1] =
462 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
463
464 self.track_cache_access(i);
466 self.track_cache_access(physical_i1);
467 }
468 }
469 }
470 }
471
472 Ok(())
473 }
474
475 fn apply_single_qubit_z_order(
477 &mut self,
478 _target: usize,
479 gate_matrix: &Array2<Complex64>,
480 mask: usize,
481 ) -> Result<()> {
482 let bits = self.num_qubits;
484 let tile_size = 4; for tile_y in (0..(1 << (bits / 2))).step_by(tile_size) {
487 for tile_x in (0..(1 << (bits / 2))).step_by(tile_size) {
488 for y in tile_y..std::cmp::min(tile_y + tile_size, 1 << (bits / 2)) {
489 for x in tile_x..std::cmp::min(tile_x + tile_size, 1 << (bits / 2)) {
490 let logical_index = y * (1 << (bits / 2)) + x;
491 let logical_i0 = logical_index & !mask;
492 let logical_i1 = logical_i0 | mask;
493
494 let physical_i0 = self.logical_to_physical(logical_i0);
495 let physical_i1 = self.logical_to_physical(logical_i1);
496
497 if physical_i0 < self.data.len() && physical_i1 < self.data.len() {
498 let amp0 = self.data[physical_i0];
499 let amp1 = self.data[physical_i1];
500
501 self.data[physical_i0] =
502 gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
503 self.data[physical_i1] =
504 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
505
506 self.track_cache_access(physical_i0);
507 self.track_cache_access(physical_i1);
508 }
509 }
510 }
511 }
512 }
513
514 Ok(())
515 }
516
517 fn apply_single_qubit_hilbert(
519 &mut self,
520 _target: usize,
521 gate_matrix: &Array2<Complex64>,
522 mask: usize,
523 ) -> Result<()> {
524 let order = self.num_qubits / 2;
526 let curve_length = 1 << self.num_qubits;
527
528 for hilbert_index in (0..curve_length).step_by(2) {
529 let (x, y) = Self::hilbert_index_to_xy(hilbert_index, order);
530 let logical_index = y * (1 << order) + x;
531
532 let logical_i0 = logical_index & !mask;
533 let logical_i1 = logical_i0 | mask;
534
535 let physical_i0 = self.logical_to_physical(logical_i0);
536 let physical_i1 = self.logical_to_physical(logical_i1);
537
538 if physical_i0 < self.data.len() && physical_i1 < self.data.len() {
539 let amp0 = self.data[physical_i0];
540 let amp1 = self.data[physical_i1];
541
542 self.data[physical_i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
543 self.data[physical_i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
544
545 self.track_cache_access(physical_i0);
546 self.track_cache_access(physical_i1);
547 }
548 }
549
550 Ok(())
551 }
552
553 fn apply_single_qubit_strided(
555 &mut self,
556 _target: usize,
557 gate_matrix: &Array2<Complex64>,
558 mask: usize,
559 ) -> Result<()> {
560 let stride = 8; for group_start in (0..self.data.len()).step_by(stride * 2) {
563 for offset in 0..stride {
565 let i = group_start + offset;
566 if i + stride < self.data.len() {
567 let logical_i0 = self.physical_to_logical(i);
568 let logical_i1 = logical_i0 | mask;
569 let physical_i1 = self.logical_to_physical(logical_i1);
570
571 if physical_i1 < self.data.len() {
572 let amp0 = self.data[i];
573 let amp1 = self.data[physical_i1];
574
575 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
576 self.data[physical_i1] =
577 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
578
579 self.track_cache_access(i);
580 self.track_cache_access(physical_i1);
581 }
582 }
583 }
584 }
585
586 Ok(())
587 }
588
589 fn apply_single_qubit_linear(
591 &mut self,
592 _target: usize,
593 gate_matrix: &Array2<Complex64>,
594 mask: usize,
595 ) -> Result<()> {
596 for i in (0..self.data.len()).step_by(2) {
597 let logical_i0 = self.physical_to_logical(i);
598 let logical_i1 = logical_i0 | mask;
599 let physical_i1 = self.logical_to_physical(logical_i1);
600
601 if physical_i1 < self.data.len() {
602 let amp0 = self.data[i];
603 let amp1 = self.data[physical_i1];
604
605 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
606 self.data[physical_i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
607
608 self.track_cache_access(i);
609 self.track_cache_access(physical_i1);
610 }
611 }
612
613 Ok(())
614 }
615
616 fn track_cache_access(&self, physical_index: usize) {
618 if let Ok(mut pattern) = self.access_pattern.lock() {
619 let cache_line = physical_index
620 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
621
622 *pattern.line_access_counts.entry(cache_line).or_insert(0) += 1;
624
625 pattern.access_sequence.push_back(physical_index);
627 if pattern.access_sequence.len() > 1000 {
628 pattern.access_sequence.pop_front();
629 }
630
631 if pattern.line_access_counts.get(&cache_line).unwrap_or(&0) > &1 {
633 pattern.cache_hits += 1;
634 } else {
635 pattern.cache_misses += 1;
636 }
637 }
638 }
639
640 fn update_access_statistics(&self, operation_time: Duration) {
642 if let Ok(mut pattern) = self.access_pattern.lock() {
643 let total_accesses = pattern.cache_hits + pattern.cache_misses;
645 if total_accesses > 0 {
646 pattern.temporal_locality = pattern.cache_hits as f64 / total_accesses as f64;
647 }
648
649 if pattern.access_sequence.len() > 1 {
651 let mut spatial_hits = 0;
652 let mut total_pairs = 0;
653
654 for window in pattern.access_sequence.as_slices().0.windows(2) {
655 if let [addr1, addr2] = window {
656 let line1 = addr1
657 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
658 let line2 = addr2
659 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
660
661 if line1 == line2 || line1.abs_diff(line2) <= 1 {
662 spatial_hits += 1;
663 }
664 total_pairs += 1;
665 }
666 }
667
668 if total_pairs > 0 {
669 pattern.spatial_locality = spatial_hits as f64 / total_pairs as f64;
670 }
671 }
672
673 if pattern.access_sequence.len() >= 3 {
675 let recent_accesses: Vec<_> =
676 pattern.access_sequence.iter().rev().take(10).collect();
677 pattern.detected_strides = Self::detect_strides(&recent_accesses);
678 }
679 }
680 }
681
682 fn detect_strides(accesses: &[&usize]) -> Vec<isize> {
684 let mut strides = Vec::new();
685
686 if accesses.len() >= 3 {
687 for window in accesses.windows(3) {
688 if let [addr1, addr2, addr3] = window {
689 let stride1 = **addr2 as isize - **addr1 as isize;
690 let stride2 = **addr3 as isize - **addr2 as isize;
691
692 if stride1 == stride2 && stride1 != 0 {
693 strides.push(stride1);
694 }
695 }
696 }
697 }
698
699 strides
700 }
701
702 pub fn get_cache_stats(&self) -> Result<CachePerformanceStats> {
704 let pattern = self.access_pattern.lock().unwrap();
705 let total_accesses = pattern.cache_hits + pattern.cache_misses;
706
707 Ok(CachePerformanceStats {
708 cache_hit_rate: if total_accesses > 0 {
709 pattern.cache_hits as f64 / total_accesses as f64
710 } else {
711 0.0
712 },
713 cache_miss_rate: if total_accesses > 0 {
714 pattern.cache_misses as f64 / total_accesses as f64
715 } else {
716 0.0
717 },
718 temporal_locality: pattern.temporal_locality,
719 spatial_locality: pattern.spatial_locality,
720 total_cache_lines_accessed: pattern.line_access_counts.len(),
721 average_accesses_per_line: if !pattern.line_access_counts.is_empty() {
722 pattern.line_access_counts.values().sum::<u64>() as f64
723 / pattern.line_access_counts.len() as f64
724 } else {
725 0.0
726 },
727 detected_strides: pattern.detected_strides.clone(),
728 current_layout: self.layout,
729 })
730 }
731
732 pub fn adapt_cache_layout(&mut self) -> Result<CacheLayoutAdaptationResult> {
734 let stats = self.get_cache_stats()?;
735 let current_performance = stats.cache_hit_rate;
736
737 let recommended_layout = if stats.spatial_locality > 0.8 {
739 CacheOptimizedLayout::Blocked
740 } else if stats.detected_strides.iter().any(|&s| s.abs() == 1) {
741 CacheOptimizedLayout::Linear
742 } else if stats.detected_strides.len() > 2 {
743 CacheOptimizedLayout::Strided
744 } else if stats.temporal_locality < 0.5 {
745 CacheOptimizedLayout::Hilbert
746 } else {
747 CacheOptimizedLayout::ZOrder
748 };
749
750 let layout_changed = recommended_layout != self.layout;
751
752 if layout_changed {
753 let old_layout = self.layout;
755
756 let (new_indices, new_inverse) =
758 Self::generate_layout_indices(self.data.len(), recommended_layout)?;
759 self.layout_indices = new_indices;
760 self.inverse_indices = new_inverse;
761 self.layout = recommended_layout;
762
763 self.apply_layout_transformation()?;
765
766 Ok(CacheLayoutAdaptationResult {
767 layout_changed: true,
768 old_layout,
769 new_layout: recommended_layout,
770 performance_before: current_performance,
771 expected_performance_improvement: 0.1, adaptation_overhead: Duration::from_millis(1), })
774 } else {
775 Ok(CacheLayoutAdaptationResult {
776 layout_changed: false,
777 old_layout: self.layout,
778 new_layout: self.layout,
779 performance_before: current_performance,
780 expected_performance_improvement: 0.0,
781 adaptation_overhead: Duration::from_nanos(0),
782 })
783 }
784 }
785
786 pub fn data(&self) -> &[Complex64] {
788 &self.data
789 }
790
791 pub fn layout(&self) -> CacheOptimizedLayout {
793 self.layout
794 }
795
796 pub fn num_qubits(&self) -> usize {
798 self.num_qubits
799 }
800}
801
802#[derive(Debug, Clone)]
804pub struct CachePerformanceStats {
805 pub cache_hit_rate: f64,
807 pub cache_miss_rate: f64,
809 pub temporal_locality: f64,
811 pub spatial_locality: f64,
813 pub total_cache_lines_accessed: usize,
815 pub average_accesses_per_line: f64,
817 pub detected_strides: Vec<isize>,
819 pub current_layout: CacheOptimizedLayout,
821}
822
823#[derive(Debug, Clone)]
825pub struct CacheLayoutAdaptationResult {
826 pub layout_changed: bool,
828 pub old_layout: CacheOptimizedLayout,
830 pub new_layout: CacheOptimizedLayout,
832 pub performance_before: f64,
834 pub expected_performance_improvement: f64,
836 pub adaptation_overhead: Duration,
838}
839
840#[derive(Debug)]
842pub struct CacheOptimizedGateManager {
843 cache_config: CacheHierarchyConfig,
845 operation_stats: HashMap<String, CacheOperationStats>,
847}
848
849impl CacheOptimizedGateManager {
850 pub fn new(cache_config: CacheHierarchyConfig) -> Self {
852 Self {
853 cache_config,
854 operation_stats: HashMap::new(),
855 }
856 }
857
858 pub fn execute_gate(
860 &mut self,
861 state_vector: &mut CacheOptimizedStateVector,
862 gate_name: &str,
863 target_qubits: &[usize],
864 gate_matrix: &Array2<Complex64>,
865 ) -> Result<CacheOperationStats> {
866 let start_time = Instant::now();
867
868 match target_qubits.len() {
869 1 => {
870 state_vector
871 .apply_single_qubit_gate_cache_optimized(target_qubits[0], gate_matrix)?;
872 }
873 2 => {
874 return Err(SimulatorError::NotImplemented(
876 "Two-qubit cache-optimized gates not implemented".to_string(),
877 ));
878 }
879 _ => {
880 return Err(SimulatorError::InvalidParameter(
881 "Only single and two-qubit gates supported".to_string(),
882 ));
883 }
884 }
885
886 let execution_time = start_time.elapsed();
887 let cache_stats = state_vector.get_cache_stats()?;
888
889 let operation_stats = CacheOperationStats {
890 gate_name: gate_name.to_string(),
891 execution_time,
892 cache_hit_rate: cache_stats.cache_hit_rate,
893 spatial_locality: cache_stats.spatial_locality,
894 temporal_locality: cache_stats.temporal_locality,
895 memory_accesses: cache_stats.total_cache_lines_accessed,
896 };
897
898 self.operation_stats
899 .insert(gate_name.to_string(), operation_stats.clone());
900
901 Ok(operation_stats)
902 }
903
904 pub fn get_operation_statistics(&self) -> HashMap<String, CacheOperationStats> {
906 self.operation_stats.clone()
907 }
908}
909
910#[derive(Debug, Clone)]
912pub struct CacheOperationStats {
913 pub gate_name: String,
915 pub execution_time: Duration,
917 pub cache_hit_rate: f64,
919 pub spatial_locality: f64,
921 pub temporal_locality: f64,
923 pub memory_accesses: usize,
925}
926
927#[cfg(test)]
928mod tests {
929 use super::*;
930 use ndarray::Array2;
931
932 #[test]
933 fn test_cache_optimized_state_vector_creation() {
934 let cache_config = CacheHierarchyConfig::default();
935 let memory_config = MemoryOptimizationConfig::default();
936
937 let state_vector = CacheOptimizedStateVector::new(
938 3,
939 CacheOptimizedLayout::Blocked,
940 cache_config,
941 memory_config,
942 )
943 .unwrap();
944
945 assert_eq!(state_vector.num_qubits(), 3);
946 assert_eq!(state_vector.data().len(), 8);
947 assert_eq!(state_vector.layout(), CacheOptimizedLayout::Blocked);
948 }
949
950 #[test]
951 fn test_blocked_layout_indices() {
952 let indices = CacheOptimizedStateVector::generate_blocked_indices(16).unwrap();
953 assert_eq!(indices.len(), 16);
954
955 let mut sorted_indices = indices.clone();
957 sorted_indices.sort();
958 assert_eq!(sorted_indices, (0..16).collect::<Vec<_>>());
959 }
960
961 #[test]
962 fn test_z_order_layout() {
963 let indices = CacheOptimizedStateVector::generate_z_order_indices(16).unwrap();
964 assert_eq!(indices.len(), 16);
965 }
966
967 #[test]
968 fn test_hilbert_curve_layout() {
969 let indices = CacheOptimizedStateVector::generate_hilbert_indices(16).unwrap();
970 assert_eq!(indices.len(), 16);
971 }
972
973 #[test]
974 fn test_bit_reversal_layout() {
975 let indices = CacheOptimizedStateVector::generate_bit_reversal_indices(8).unwrap();
976 assert_eq!(indices.len(), 8);
977
978 assert_eq!(indices[1], 4); assert_eq!(indices[2], 2); assert_eq!(indices[3], 6); }
983
984 #[test]
985 fn test_single_qubit_gate_application() {
986 let cache_config = CacheHierarchyConfig::default();
987 let memory_config = MemoryOptimizationConfig::default();
988
989 let mut state_vector = CacheOptimizedStateVector::new(
990 2,
991 CacheOptimizedLayout::Linear,
992 cache_config,
993 memory_config,
994 )
995 .unwrap();
996
997 let gate_matrix = Array2::from_shape_vec(
999 (2, 2),
1000 vec![
1001 Complex64::new(0.0, 0.0),
1002 Complex64::new(1.0, 0.0),
1003 Complex64::new(1.0, 0.0),
1004 Complex64::new(0.0, 0.0),
1005 ],
1006 )
1007 .unwrap();
1008
1009 state_vector
1010 .apply_single_qubit_gate_cache_optimized(0, &gate_matrix)
1011 .unwrap();
1012
1013 let cache_stats = state_vector.get_cache_stats().unwrap();
1015 assert!(cache_stats.total_cache_lines_accessed > 0);
1016 }
1017
1018 #[test]
1019 fn test_cache_statistics() {
1020 let cache_config = CacheHierarchyConfig::default();
1021 let memory_config = MemoryOptimizationConfig::default();
1022
1023 let state_vector = CacheOptimizedStateVector::new(
1024 3,
1025 CacheOptimizedLayout::Blocked,
1026 cache_config,
1027 memory_config,
1028 )
1029 .unwrap();
1030
1031 let stats = state_vector.get_cache_stats().unwrap();
1032 assert!(stats.cache_hit_rate >= 0.0 && stats.cache_hit_rate <= 1.0);
1033 assert!(stats.spatial_locality >= 0.0 && stats.spatial_locality <= 1.0);
1034 assert!(stats.temporal_locality >= 0.0 && stats.temporal_locality <= 1.0);
1035 }
1036
1037 #[test]
1038 fn test_layout_adaptation() {
1039 let cache_config = CacheHierarchyConfig::default();
1040 let memory_config = MemoryOptimizationConfig::default();
1041
1042 let mut state_vector = CacheOptimizedStateVector::new(
1043 3,
1044 CacheOptimizedLayout::Linear,
1045 cache_config,
1046 memory_config,
1047 )
1048 .unwrap();
1049
1050 let adaptation_result = state_vector.adapt_cache_layout().unwrap();
1051
1052 assert!(adaptation_result.performance_before >= 0.0);
1054 assert!(adaptation_result.adaptation_overhead >= Duration::from_nanos(0));
1055 }
1056
1057 #[test]
1058 fn test_cache_optimized_gate_manager() {
1059 let cache_config = CacheHierarchyConfig::default();
1060 let memory_config = MemoryOptimizationConfig::default();
1061
1062 let mut manager = CacheOptimizedGateManager::new(cache_config.clone());
1063 let mut state_vector = CacheOptimizedStateVector::new(
1064 2,
1065 CacheOptimizedLayout::Blocked,
1066 cache_config,
1067 memory_config,
1068 )
1069 .unwrap();
1070
1071 let gate_matrix = Array2::from_shape_vec(
1073 (2, 2),
1074 vec![
1075 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1076 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1077 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1078 Complex64::new(-1.0 / 2.0_f64.sqrt(), 0.0),
1079 ],
1080 )
1081 .unwrap();
1082
1083 let stats = manager
1084 .execute_gate(&mut state_vector, "H", &[0], &gate_matrix)
1085 .unwrap();
1086
1087 assert_eq!(stats.gate_name, "H");
1088 assert!(stats.execution_time > Duration::from_nanos(0));
1089 assert!(stats.cache_hit_rate >= 0.0 && stats.cache_hit_rate <= 1.0);
1090 }
1091
1092 #[test]
1093 fn test_morton_encoding() {
1094 let encoded = CacheOptimizedStateVector::morton_encode_2d(5, 2); assert!(encoded < 16); }
1097
1098 #[test]
1099 fn test_hilbert_coordinate_conversion() {
1100 let (x, y) = CacheOptimizedStateVector::hilbert_index_to_xy(3, 2);
1101 assert!(x < 4 && y < 4); }
1103
1104 #[test]
1105 fn test_stride_detection() {
1106 let accesses = vec![&0, &4, &8, &12, &16];
1107 let strides = CacheOptimizedStateVector::detect_strides(&accesses);
1108 assert!(strides.contains(&4)); }
1110}