1use scirs2_core::ndarray::Array2;
8use scirs2_core::Complex64;
9#[cfg(target_arch = "x86_64")]
10use std::arch::x86_64::*;
11use std::collections::{HashMap, VecDeque};
12use std::sync::{Arc, Mutex};
13use std::time::{Duration, Instant};
14
15use crate::error::{Result, SimulatorError};
16use crate::memory_bandwidth_optimization::MemoryOptimizationConfig;
17
18#[derive(Debug, Clone)]
20pub struct CacheHierarchyConfig {
21 pub l1_size: usize,
23 pub l1_line_size: usize,
25 pub l1_associativity: usize,
27 pub l2_size: usize,
29 pub l2_line_size: usize,
31 pub l2_associativity: usize,
33 pub l3_size: usize,
35 pub l3_line_size: usize,
37 pub l3_associativity: usize,
39 pub memory_latency: usize,
41 pub replacement_policy: CacheReplacementPolicy,
43}
44
45impl Default for CacheHierarchyConfig {
46 fn default() -> Self {
47 Self {
48 l1_size: 32 * 1024, l1_line_size: 64, l1_associativity: 8, l2_size: 256 * 1024, l2_line_size: 64, l2_associativity: 8, l3_size: 8 * 1024 * 1024, l3_line_size: 64, l3_associativity: 16, memory_latency: 300, replacement_policy: CacheReplacementPolicy::LRU,
59 }
60 }
61}
62
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum CacheReplacementPolicy {
66 LRU,
68 FIFO,
70 Random,
72 LFU,
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78pub enum CacheOptimizedLayout {
79 Linear,
81 Blocked,
83 ZOrder,
85 Hilbert,
87 BitReversal,
89 Strided,
91 Hierarchical,
93}
94
95#[derive(Debug, Clone)]
97pub struct CacheAccessPattern {
98 pub line_access_counts: HashMap<usize, u64>,
100 pub cache_hits: u64,
102 pub cache_misses: u64,
103 pub access_sequence: VecDeque<usize>,
105 pub detected_strides: Vec<isize>,
107 pub temporal_locality: f64,
109 pub spatial_locality: f64,
111}
112
113impl Default for CacheAccessPattern {
114 fn default() -> Self {
115 Self {
116 line_access_counts: HashMap::new(),
117 cache_hits: 0,
118 cache_misses: 0,
119 access_sequence: VecDeque::new(),
120 detected_strides: Vec::new(),
121 temporal_locality: 0.0,
122 spatial_locality: 0.0,
123 }
124 }
125}
126
127#[derive(Debug)]
129pub struct CacheOptimizedStateVector {
130 data: Vec<Complex64>,
132 num_qubits: usize,
134 layout: CacheOptimizedLayout,
136 cache_config: CacheHierarchyConfig,
138 memory_config: MemoryOptimizationConfig,
140 access_pattern: Arc<Mutex<CacheAccessPattern>>,
142 layout_indices: Vec<usize>,
144 inverse_indices: Vec<usize>,
146}
147
148impl CacheOptimizedStateVector {
149 pub fn new(
151 num_qubits: usize,
152 layout: CacheOptimizedLayout,
153 cache_config: CacheHierarchyConfig,
154 memory_config: MemoryOptimizationConfig,
155 ) -> Result<Self> {
156 let size = 1 << num_qubits;
157
158 let (layout_indices, inverse_indices) = Self::generate_layout_indices(size, layout)?;
160
161 let mut data = vec![Complex64::new(0.0, 0.0); size];
163 data[0] = Complex64::new(1.0, 0.0); let mut instance = Self {
167 data,
168 num_qubits,
169 layout,
170 cache_config,
171 memory_config,
172 access_pattern: Arc::new(Mutex::new(CacheAccessPattern::default())),
173 layout_indices,
174 inverse_indices,
175 };
176
177 instance.apply_layout_transformation()?;
178
179 Ok(instance)
180 }
181
182 fn generate_layout_indices(
184 size: usize,
185 layout: CacheOptimizedLayout,
186 ) -> Result<(Vec<usize>, Vec<usize>)> {
187 let indices = match layout {
188 CacheOptimizedLayout::Linear => (0..size).collect::<Vec<usize>>(),
189 CacheOptimizedLayout::Blocked => Self::generate_blocked_indices(size)?,
190 CacheOptimizedLayout::ZOrder => Self::generate_z_order_indices(size)?,
191 CacheOptimizedLayout::Hilbert => Self::generate_hilbert_indices(size)?,
192 CacheOptimizedLayout::BitReversal => Self::generate_bit_reversal_indices(size)?,
193 CacheOptimizedLayout::Strided => Self::generate_strided_indices(size)?,
194 CacheOptimizedLayout::Hierarchical => Self::generate_hierarchical_indices(size)?,
195 };
196
197 let mut inverse_indices = vec![0; size];
199 for (new_idx, &old_idx) in indices.iter().enumerate() {
200 inverse_indices[old_idx] = new_idx;
201 }
202
203 Ok((indices, inverse_indices))
204 }
205
206 fn generate_blocked_indices(size: usize) -> Result<Vec<usize>> {
208 let block_size = 64 / std::mem::size_of::<Complex64>(); let mut indices = Vec::with_capacity(size);
210
211 for block_start in (0..size).step_by(block_size) {
212 let block_end = std::cmp::min(block_start + block_size, size);
213 for i in block_start..block_end {
214 indices.push(i);
215 }
216 }
217
218 Ok(indices)
219 }
220
221 fn generate_z_order_indices(size: usize) -> Result<Vec<usize>> {
223 let bits = (size as f64).log2() as usize;
224 if 1 << bits != size {
225 return Err(SimulatorError::InvalidParameter(
226 "Z-order layout requires power-of-2 size".to_string(),
227 ));
228 }
229
230 let mut indices = Vec::with_capacity(size);
231
232 for i in 0..size {
233 let morton_index = Self::morton_encode_2d(i, bits / 2);
234 indices.push(morton_index % size);
235 }
236
237 Ok(indices)
238 }
239
240 fn morton_encode_2d(index: usize, bits_per_dim: usize) -> usize {
242 let x = index & ((1 << bits_per_dim) - 1);
243 let y = index >> bits_per_dim;
244
245 let mut result = 0;
246 for i in 0..bits_per_dim {
247 result |= ((x >> i) & 1) << (2 * i);
248 result |= ((y >> i) & 1) << (2 * i + 1);
249 }
250
251 result
252 }
253
254 fn generate_hilbert_indices(size: usize) -> Result<Vec<usize>> {
256 let bits = (size as f64).log2() as usize;
257 if 1 << bits != size {
258 return Err(SimulatorError::InvalidParameter(
259 "Hilbert layout requires power-of-2 size".to_string(),
260 ));
261 }
262
263 let order = bits / 2;
264 let mut indices = Vec::with_capacity(size);
265
266 for i in 0..size {
267 let (x, y) = Self::hilbert_index_to_xy(i, order);
268 let linear_index = y * (1 << order) + x;
269 indices.push(linear_index % size);
270 }
271
272 Ok(indices)
273 }
274
275 fn hilbert_index_to_xy(mut index: usize, order: usize) -> (usize, usize) {
277 let mut x = 0;
278 let mut y = 0;
279
280 for s in (0..order).rev() {
281 let rx = 1 & (index >> 1);
282 let ry = 1 & (index ^ rx);
283
284 if ry == 0 {
285 if rx == 1 {
286 let max_val = (1usize << s).saturating_sub(1);
288 x = max_val.saturating_sub(x);
289 y = max_val.saturating_sub(y);
290 }
291 std::mem::swap(&mut x, &mut y);
292 }
293
294 x += rx << s;
295 y += ry << s;
296 index >>= 2;
297 }
298
299 (x, y)
300 }
301
302 fn generate_bit_reversal_indices(size: usize) -> Result<Vec<usize>> {
304 let bits = (size as f64).log2() as usize;
305 if 1 << bits != size {
306 return Err(SimulatorError::InvalidParameter(
307 "Bit-reversal layout requires power-of-2 size".to_string(),
308 ));
309 }
310
311 let mut indices = Vec::with_capacity(size);
312
313 for i in 0..size {
314 let reversed = Self::reverse_bits(i, bits);
315 indices.push(reversed);
316 }
317
318 Ok(indices)
319 }
320
321 fn reverse_bits(mut num: usize, bits: usize) -> usize {
323 let mut result = 0;
324 for _ in 0..bits {
325 result = (result << 1) | (num & 1);
326 num >>= 1;
327 }
328 result
329 }
330
331 fn generate_strided_indices(size: usize) -> Result<Vec<usize>> {
333 let stride = 8; let mut indices = Vec::with_capacity(size);
335
336 for group in 0..size.div_ceil(stride) {
337 for offset in 0..stride {
338 let index = group * stride + offset;
339 if index < size {
340 indices.push(index);
341 }
342 }
343 }
344
345 Ok(indices)
346 }
347
348 fn generate_hierarchical_indices(size: usize) -> Result<Vec<usize>> {
350 let l1_block_size = 32 / std::mem::size_of::<Complex64>(); let l2_block_size = 256 / std::mem::size_of::<Complex64>(); let mut indices = Vec::with_capacity(size);
354
355 for l2_block in 0..size.div_ceil(l2_block_size) {
356 let l2_start = l2_block * l2_block_size;
357 let l2_end = std::cmp::min(l2_start + l2_block_size, size);
358
359 for l1_block_start in (l2_start..l2_end).step_by(l1_block_size) {
360 let l1_block_end = std::cmp::min(l1_block_start + l1_block_size, l2_end);
361
362 for i in l1_block_start..l1_block_end {
363 indices.push(i);
364 }
365 }
366 }
367
368 Ok(indices)
369 }
370
371 fn apply_layout_transformation(&mut self) -> Result<()> {
373 let original_data = self.data.clone();
374
375 for (new_idx, &old_idx) in self.layout_indices.iter().enumerate() {
376 self.data[new_idx] = original_data[old_idx];
377 }
378
379 Ok(())
380 }
381
382 fn logical_to_physical(&self, logical_index: usize) -> usize {
384 self.inverse_indices[logical_index]
385 }
386
387 fn physical_to_logical(&self, physical_index: usize) -> usize {
389 self.layout_indices[physical_index]
390 }
391
392 pub fn apply_single_qubit_gate_cache_optimized(
394 &mut self,
395 target: usize,
396 gate_matrix: &Array2<Complex64>,
397 ) -> Result<()> {
398 let start_time = Instant::now();
399 let mask = 1 << target;
400
401 match self.layout {
403 CacheOptimizedLayout::Blocked => {
404 self.apply_single_qubit_blocked(target, gate_matrix, mask)?;
405 }
406 CacheOptimizedLayout::ZOrder => {
407 self.apply_single_qubit_z_order(target, gate_matrix, mask)?;
408 }
409 CacheOptimizedLayout::Hilbert => {
410 self.apply_single_qubit_hilbert(target, gate_matrix, mask)?;
411 }
412 CacheOptimizedLayout::Strided => {
413 self.apply_single_qubit_strided(target, gate_matrix, mask)?;
414 }
415 _ => {
416 self.apply_single_qubit_linear(target, gate_matrix, mask)?;
417 }
418 }
419
420 self.update_access_statistics(start_time.elapsed());
422
423 Ok(())
424 }
425
426 pub fn apply_two_qubit_gate_cache_optimized(
433 &mut self,
434 control: usize,
435 target: usize,
436 gate_matrix: &Array2<Complex64>,
437 ) -> Result<()> {
438 let start_time = Instant::now();
439 let n = self.data.len();
440 let mask_c = 1_usize << control;
441 let mask_t = 1_usize << target;
442 let num_qubits = (n as f64).log2() as usize;
443 if control >= num_qubits || target >= num_qubits {
444 return Err(SimulatorError::InvalidParameter(
445 "Qubit index out of range for two-qubit cache-optimized gate".to_string(),
446 ));
447 }
448 for i in 0..n {
450 let li = self.physical_to_logical(i);
451 if (li & mask_c) != 0 || (li & mask_t) != 0 {
452 continue; }
454 let li01 = li | mask_t;
455 let li10 = li | mask_c;
456 let li11 = li | mask_c | mask_t;
457 let pi00 = self.logical_to_physical(li);
458 let pi01 = self.logical_to_physical(li01);
459 let pi10 = self.logical_to_physical(li10);
460 let pi11 = self.logical_to_physical(li11);
461 if pi00 >= n || pi01 >= n || pi10 >= n || pi11 >= n {
462 continue;
463 }
464 let a00 = self.data[pi00];
465 let a01 = self.data[pi01];
466 let a10 = self.data[pi10];
467 let a11 = self.data[pi11];
468 self.data[pi00] = gate_matrix[[0, 0]] * a00
469 + gate_matrix[[0, 1]] * a01
470 + gate_matrix[[0, 2]] * a10
471 + gate_matrix[[0, 3]] * a11;
472 self.data[pi01] = gate_matrix[[1, 0]] * a00
473 + gate_matrix[[1, 1]] * a01
474 + gate_matrix[[1, 2]] * a10
475 + gate_matrix[[1, 3]] * a11;
476 self.data[pi10] = gate_matrix[[2, 0]] * a00
477 + gate_matrix[[2, 1]] * a01
478 + gate_matrix[[2, 2]] * a10
479 + gate_matrix[[2, 3]] * a11;
480 self.data[pi11] = gate_matrix[[3, 0]] * a00
481 + gate_matrix[[3, 1]] * a01
482 + gate_matrix[[3, 2]] * a10
483 + gate_matrix[[3, 3]] * a11;
484 }
485 self.update_access_statistics(start_time.elapsed());
486 Ok(())
487 }
488
489 fn apply_single_qubit_blocked(
491 &mut self,
492 _target: usize,
493 gate_matrix: &Array2<Complex64>,
494 mask: usize,
495 ) -> Result<()> {
496 let block_size = self.cache_config.l1_line_size / std::mem::size_of::<Complex64>();
497
498 for block_start in (0..self.data.len()).step_by(block_size) {
499 let block_end = std::cmp::min(block_start + block_size, self.data.len());
500
501 if block_end < self.data.len() {
503 #[cfg(target_arch = "x86_64")]
504 unsafe {
505 std::arch::x86_64::_mm_prefetch(
506 self.data.as_ptr().add(block_end) as *const i8,
507 std::arch::x86_64::_MM_HINT_T0,
508 );
509 }
510 }
511
512 for i in (block_start..block_end).step_by(2) {
514 if i + 1 < self.data.len() {
515 let logical_i0 = self.physical_to_logical(i);
516 let logical_i1 = logical_i0 | mask;
517 let physical_i1 = self.logical_to_physical(logical_i1);
518
519 if physical_i1 < self.data.len() {
520 let amp0 = self.data[i];
521 let amp1 = self.data[physical_i1];
522
523 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
524 self.data[physical_i1] =
525 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
526
527 self.track_cache_access(i);
529 self.track_cache_access(physical_i1);
530 }
531 }
532 }
533 }
534
535 Ok(())
536 }
537
538 fn apply_single_qubit_z_order(
540 &mut self,
541 _target: usize,
542 gate_matrix: &Array2<Complex64>,
543 mask: usize,
544 ) -> Result<()> {
545 let bits = self.num_qubits;
547 let tile_size = 4; for tile_y in (0..(1 << (bits / 2))).step_by(tile_size) {
550 for tile_x in (0..(1 << (bits / 2))).step_by(tile_size) {
551 for y in tile_y..std::cmp::min(tile_y + tile_size, 1 << (bits / 2)) {
552 for x in tile_x..std::cmp::min(tile_x + tile_size, 1 << (bits / 2)) {
553 let logical_index = y * (1 << (bits / 2)) + x;
554 let logical_i0 = logical_index & !mask;
555 let logical_i1 = logical_i0 | mask;
556
557 let physical_i0 = self.logical_to_physical(logical_i0);
558 let physical_i1 = self.logical_to_physical(logical_i1);
559
560 if physical_i0 < self.data.len() && physical_i1 < self.data.len() {
561 let amp0 = self.data[physical_i0];
562 let amp1 = self.data[physical_i1];
563
564 self.data[physical_i0] =
565 gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
566 self.data[physical_i1] =
567 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
568
569 self.track_cache_access(physical_i0);
570 self.track_cache_access(physical_i1);
571 }
572 }
573 }
574 }
575 }
576
577 Ok(())
578 }
579
580 fn apply_single_qubit_hilbert(
582 &mut self,
583 _target: usize,
584 gate_matrix: &Array2<Complex64>,
585 mask: usize,
586 ) -> Result<()> {
587 let order = self.num_qubits / 2;
589 let curve_length = 1 << self.num_qubits;
590
591 for hilbert_index in (0..curve_length).step_by(2) {
592 let (x, y) = Self::hilbert_index_to_xy(hilbert_index, order);
593 let logical_index = y * (1 << order) + x;
594
595 let logical_i0 = logical_index & !mask;
596 let logical_i1 = logical_i0 | mask;
597
598 let physical_i0 = self.logical_to_physical(logical_i0);
599 let physical_i1 = self.logical_to_physical(logical_i1);
600
601 if physical_i0 < self.data.len() && physical_i1 < self.data.len() {
602 let amp0 = self.data[physical_i0];
603 let amp1 = self.data[physical_i1];
604
605 self.data[physical_i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
606 self.data[physical_i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
607
608 self.track_cache_access(physical_i0);
609 self.track_cache_access(physical_i1);
610 }
611 }
612
613 Ok(())
614 }
615
616 fn apply_single_qubit_strided(
618 &mut self,
619 _target: usize,
620 gate_matrix: &Array2<Complex64>,
621 mask: usize,
622 ) -> Result<()> {
623 let stride = 8; for group_start in (0..self.data.len()).step_by(stride * 2) {
626 for offset in 0..stride {
628 let i = group_start + offset;
629 if i + stride < self.data.len() {
630 let logical_i0 = self.physical_to_logical(i);
631 let logical_i1 = logical_i0 | mask;
632 let physical_i1 = self.logical_to_physical(logical_i1);
633
634 if physical_i1 < self.data.len() {
635 let amp0 = self.data[i];
636 let amp1 = self.data[physical_i1];
637
638 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
639 self.data[physical_i1] =
640 gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
641
642 self.track_cache_access(i);
643 self.track_cache_access(physical_i1);
644 }
645 }
646 }
647 }
648
649 Ok(())
650 }
651
652 fn apply_single_qubit_linear(
654 &mut self,
655 _target: usize,
656 gate_matrix: &Array2<Complex64>,
657 mask: usize,
658 ) -> Result<()> {
659 for i in (0..self.data.len()).step_by(2) {
660 let logical_i0 = self.physical_to_logical(i);
661 let logical_i1 = logical_i0 | mask;
662 let physical_i1 = self.logical_to_physical(logical_i1);
663
664 if physical_i1 < self.data.len() {
665 let amp0 = self.data[i];
666 let amp1 = self.data[physical_i1];
667
668 self.data[i] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
669 self.data[physical_i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
670
671 self.track_cache_access(i);
672 self.track_cache_access(physical_i1);
673 }
674 }
675
676 Ok(())
677 }
678
679 fn track_cache_access(&self, physical_index: usize) {
681 if let Ok(mut pattern) = self.access_pattern.lock() {
682 let cache_line = physical_index
683 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
684
685 *pattern.line_access_counts.entry(cache_line).or_insert(0) += 1;
687
688 pattern.access_sequence.push_back(physical_index);
690 if pattern.access_sequence.len() > 1000 {
691 pattern.access_sequence.pop_front();
692 }
693
694 if pattern.line_access_counts.get(&cache_line).unwrap_or(&0) > &1 {
696 pattern.cache_hits += 1;
697 } else {
698 pattern.cache_misses += 1;
699 }
700 }
701 }
702
703 fn update_access_statistics(&self, operation_time: Duration) {
705 if let Ok(mut pattern) = self.access_pattern.lock() {
706 let total_accesses = pattern.cache_hits + pattern.cache_misses;
708 if total_accesses > 0 {
709 pattern.temporal_locality = pattern.cache_hits as f64 / total_accesses as f64;
710 }
711
712 if pattern.access_sequence.len() > 1 {
714 let mut spatial_hits = 0;
715 let mut total_pairs = 0;
716
717 for window in pattern.access_sequence.as_slices().0.windows(2) {
718 if let [addr1, addr2] = window {
719 let line1 = addr1
720 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
721 let line2 = addr2
722 / (self.cache_config.l1_line_size / std::mem::size_of::<Complex64>());
723
724 if line1 == line2 || line1.abs_diff(line2) <= 1 {
725 spatial_hits += 1;
726 }
727 total_pairs += 1;
728 }
729 }
730
731 if total_pairs > 0 {
732 pattern.spatial_locality = f64::from(spatial_hits) / f64::from(total_pairs);
733 }
734 }
735
736 if pattern.access_sequence.len() >= 3 {
738 let recent_accesses: Vec<_> =
739 pattern.access_sequence.iter().rev().take(10).collect();
740 pattern.detected_strides = Self::detect_strides(&recent_accesses);
741 }
742 }
743 }
744
745 fn detect_strides(accesses: &[&usize]) -> Vec<isize> {
747 let mut strides = Vec::new();
748
749 if accesses.len() >= 3 {
750 for window in accesses.windows(3) {
751 if let [addr1, addr2, addr3] = window {
752 let stride1 = **addr2 as isize - **addr1 as isize;
753 let stride2 = **addr3 as isize - **addr2 as isize;
754
755 if stride1 == stride2 && stride1 != 0 {
756 strides.push(stride1);
757 }
758 }
759 }
760 }
761
762 strides
763 }
764
765 pub fn get_cache_stats(&self) -> Result<CachePerformanceStats> {
767 let pattern = self.access_pattern.lock().map_err(|e| {
768 SimulatorError::InvalidOperation(format!("Failed to acquire access pattern lock: {e}"))
769 })?;
770 let total_accesses = pattern.cache_hits + pattern.cache_misses;
771
772 Ok(CachePerformanceStats {
773 cache_hit_rate: if total_accesses > 0 {
774 pattern.cache_hits as f64 / total_accesses as f64
775 } else {
776 0.0
777 },
778 cache_miss_rate: if total_accesses > 0 {
779 pattern.cache_misses as f64 / total_accesses as f64
780 } else {
781 0.0
782 },
783 temporal_locality: pattern.temporal_locality,
784 spatial_locality: pattern.spatial_locality,
785 total_cache_lines_accessed: pattern.line_access_counts.len(),
786 average_accesses_per_line: if pattern.line_access_counts.is_empty() {
787 0.0
788 } else {
789 pattern.line_access_counts.values().sum::<u64>() as f64
790 / pattern.line_access_counts.len() as f64
791 },
792 detected_strides: pattern.detected_strides.clone(),
793 current_layout: self.layout,
794 })
795 }
796
797 pub fn adapt_cache_layout(&mut self) -> Result<CacheLayoutAdaptationResult> {
799 let stats = self.get_cache_stats()?;
800 let current_performance = stats.cache_hit_rate;
801
802 let recommended_layout = if stats.spatial_locality > 0.8 {
804 CacheOptimizedLayout::Blocked
805 } else if stats.detected_strides.iter().any(|&s| s.abs() == 1) {
806 CacheOptimizedLayout::Linear
807 } else if stats.detected_strides.len() > 2 {
808 CacheOptimizedLayout::Strided
809 } else if stats.temporal_locality < 0.5 {
810 CacheOptimizedLayout::Hilbert
811 } else {
812 CacheOptimizedLayout::ZOrder
813 };
814
815 let layout_changed = recommended_layout != self.layout;
816
817 if layout_changed {
818 let old_layout = self.layout;
820
821 let (new_indices, new_inverse) =
823 Self::generate_layout_indices(self.data.len(), recommended_layout)?;
824 self.layout_indices = new_indices;
825 self.inverse_indices = new_inverse;
826 self.layout = recommended_layout;
827
828 self.apply_layout_transformation()?;
830
831 Ok(CacheLayoutAdaptationResult {
832 layout_changed: true,
833 old_layout,
834 new_layout: recommended_layout,
835 performance_before: current_performance,
836 expected_performance_improvement: 0.1, adaptation_overhead: Duration::from_millis(1), })
839 } else {
840 Ok(CacheLayoutAdaptationResult {
841 layout_changed: false,
842 old_layout: self.layout,
843 new_layout: self.layout,
844 performance_before: current_performance,
845 expected_performance_improvement: 0.0,
846 adaptation_overhead: Duration::from_nanos(0),
847 })
848 }
849 }
850
851 #[must_use]
853 pub fn data(&self) -> &[Complex64] {
854 &self.data
855 }
856
857 #[must_use]
859 pub const fn layout(&self) -> CacheOptimizedLayout {
860 self.layout
861 }
862
863 #[must_use]
865 pub const fn num_qubits(&self) -> usize {
866 self.num_qubits
867 }
868}
869
870#[derive(Debug, Clone)]
872pub struct CachePerformanceStats {
873 pub cache_hit_rate: f64,
875 pub cache_miss_rate: f64,
877 pub temporal_locality: f64,
879 pub spatial_locality: f64,
881 pub total_cache_lines_accessed: usize,
883 pub average_accesses_per_line: f64,
885 pub detected_strides: Vec<isize>,
887 pub current_layout: CacheOptimizedLayout,
889}
890
891#[derive(Debug, Clone)]
893pub struct CacheLayoutAdaptationResult {
894 pub layout_changed: bool,
896 pub old_layout: CacheOptimizedLayout,
898 pub new_layout: CacheOptimizedLayout,
900 pub performance_before: f64,
902 pub expected_performance_improvement: f64,
904 pub adaptation_overhead: Duration,
906}
907
908#[derive(Debug)]
910pub struct CacheOptimizedGateManager {
911 cache_config: CacheHierarchyConfig,
913 operation_stats: HashMap<String, CacheOperationStats>,
915}
916
917impl CacheOptimizedGateManager {
918 #[must_use]
920 pub fn new(cache_config: CacheHierarchyConfig) -> Self {
921 Self {
922 cache_config,
923 operation_stats: HashMap::new(),
924 }
925 }
926
927 pub fn execute_gate(
929 &mut self,
930 state_vector: &mut CacheOptimizedStateVector,
931 gate_name: &str,
932 target_qubits: &[usize],
933 gate_matrix: &Array2<Complex64>,
934 ) -> Result<CacheOperationStats> {
935 let start_time = Instant::now();
936
937 match target_qubits.len() {
938 1 => {
939 state_vector
940 .apply_single_qubit_gate_cache_optimized(target_qubits[0], gate_matrix)?;
941 }
942 2 => {
943 state_vector.apply_two_qubit_gate_cache_optimized(
944 target_qubits[0],
945 target_qubits[1],
946 gate_matrix,
947 )?;
948 }
949 _ => {
950 return Err(SimulatorError::InvalidParameter(
951 "Only single and two-qubit gates supported".to_string(),
952 ));
953 }
954 }
955
956 let execution_time = start_time.elapsed();
957 let cache_stats = state_vector.get_cache_stats()?;
958
959 let operation_stats = CacheOperationStats {
960 gate_name: gate_name.to_string(),
961 execution_time,
962 cache_hit_rate: cache_stats.cache_hit_rate,
963 spatial_locality: cache_stats.spatial_locality,
964 temporal_locality: cache_stats.temporal_locality,
965 memory_accesses: cache_stats.total_cache_lines_accessed,
966 };
967
968 self.operation_stats
969 .insert(gate_name.to_string(), operation_stats.clone());
970
971 Ok(operation_stats)
972 }
973
974 #[must_use]
976 pub fn get_operation_statistics(&self) -> HashMap<String, CacheOperationStats> {
977 self.operation_stats.clone()
978 }
979}
980
981#[derive(Debug, Clone)]
983pub struct CacheOperationStats {
984 pub gate_name: String,
986 pub execution_time: Duration,
988 pub cache_hit_rate: f64,
990 pub spatial_locality: f64,
992 pub temporal_locality: f64,
994 pub memory_accesses: usize,
996}
997
998#[cfg(test)]
999mod tests {
1000 use super::*;
1001 use scirs2_core::ndarray::Array2;
1002
1003 #[test]
1004 fn test_cache_optimized_state_vector_creation() {
1005 let cache_config = CacheHierarchyConfig::default();
1006 let memory_config = MemoryOptimizationConfig::default();
1007
1008 let state_vector = CacheOptimizedStateVector::new(
1009 3,
1010 CacheOptimizedLayout::Blocked,
1011 cache_config,
1012 memory_config,
1013 )
1014 .expect("Failed to create cache-optimized state vector");
1015
1016 assert_eq!(state_vector.num_qubits(), 3);
1017 assert_eq!(state_vector.data().len(), 8);
1018 assert_eq!(state_vector.layout(), CacheOptimizedLayout::Blocked);
1019 }
1020
1021 #[test]
1022 fn test_blocked_layout_indices() {
1023 let indices = CacheOptimizedStateVector::generate_blocked_indices(16)
1024 .expect("Failed to generate blocked indices");
1025 assert_eq!(indices.len(), 16);
1026
1027 let mut sorted_indices = indices;
1029 sorted_indices.sort_unstable();
1030 assert_eq!(sorted_indices, (0..16).collect::<Vec<_>>());
1031 }
1032
1033 #[test]
1034 fn test_z_order_layout() {
1035 let indices = CacheOptimizedStateVector::generate_z_order_indices(16)
1036 .expect("Failed to generate Z-order indices");
1037 assert_eq!(indices.len(), 16);
1038 }
1039
1040 #[test]
1041 fn test_hilbert_curve_layout() {
1042 let indices = CacheOptimizedStateVector::generate_hilbert_indices(16)
1043 .expect("Failed to generate Hilbert curve indices");
1044 assert_eq!(indices.len(), 16);
1045 }
1046
1047 #[test]
1048 fn test_bit_reversal_layout() {
1049 let indices = CacheOptimizedStateVector::generate_bit_reversal_indices(8)
1050 .expect("Failed to generate bit reversal indices");
1051 assert_eq!(indices.len(), 8);
1052
1053 assert_eq!(indices[1], 4); assert_eq!(indices[2], 2); assert_eq!(indices[3], 6); }
1058
1059 #[test]
1060 fn test_single_qubit_gate_application() {
1061 let cache_config = CacheHierarchyConfig::default();
1062 let memory_config = MemoryOptimizationConfig::default();
1063
1064 let mut state_vector = CacheOptimizedStateVector::new(
1065 2,
1066 CacheOptimizedLayout::Linear,
1067 cache_config,
1068 memory_config,
1069 )
1070 .expect("Failed to create state vector");
1071
1072 let gate_matrix = Array2::from_shape_vec(
1074 (2, 2),
1075 vec![
1076 Complex64::new(0.0, 0.0),
1077 Complex64::new(1.0, 0.0),
1078 Complex64::new(1.0, 0.0),
1079 Complex64::new(0.0, 0.0),
1080 ],
1081 )
1082 .expect("Failed to create gate matrix");
1083
1084 state_vector
1085 .apply_single_qubit_gate_cache_optimized(0, &gate_matrix)
1086 .expect("Failed to apply single qubit gate");
1087
1088 let cache_stats = state_vector
1090 .get_cache_stats()
1091 .expect("Failed to get cache stats");
1092 assert!(cache_stats.total_cache_lines_accessed > 0);
1093 }
1094
1095 #[test]
1096 fn test_cache_statistics() {
1097 let cache_config = CacheHierarchyConfig::default();
1098 let memory_config = MemoryOptimizationConfig::default();
1099
1100 let state_vector = CacheOptimizedStateVector::new(
1101 3,
1102 CacheOptimizedLayout::Blocked,
1103 cache_config,
1104 memory_config,
1105 )
1106 .expect("Failed to create state vector");
1107
1108 let stats = state_vector
1109 .get_cache_stats()
1110 .expect("Failed to get cache stats");
1111 assert!(stats.cache_hit_rate >= 0.0 && stats.cache_hit_rate <= 1.0);
1112 assert!(stats.spatial_locality >= 0.0 && stats.spatial_locality <= 1.0);
1113 assert!(stats.temporal_locality >= 0.0 && stats.temporal_locality <= 1.0);
1114 }
1115
1116 #[test]
1117 fn test_layout_adaptation() {
1118 let cache_config = CacheHierarchyConfig::default();
1119 let memory_config = MemoryOptimizationConfig::default();
1120
1121 let mut state_vector = CacheOptimizedStateVector::new(
1122 3,
1123 CacheOptimizedLayout::Linear,
1124 cache_config,
1125 memory_config,
1126 )
1127 .expect("Failed to create state vector");
1128
1129 let adaptation_result = state_vector
1130 .adapt_cache_layout()
1131 .expect("Failed to adapt cache layout");
1132
1133 assert!(adaptation_result.performance_before >= 0.0);
1135 assert!(adaptation_result.adaptation_overhead >= Duration::from_nanos(0));
1136 }
1137
1138 #[test]
1139 fn test_cache_optimized_gate_manager() {
1140 let cache_config = CacheHierarchyConfig::default();
1141 let memory_config = MemoryOptimizationConfig::default();
1142
1143 let mut manager = CacheOptimizedGateManager::new(cache_config.clone());
1144 let mut state_vector = CacheOptimizedStateVector::new(
1145 2,
1146 CacheOptimizedLayout::Blocked,
1147 cache_config,
1148 memory_config,
1149 )
1150 .expect("Failed to create state vector");
1151
1152 let gate_matrix = Array2::from_shape_vec(
1154 (2, 2),
1155 vec![
1156 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1157 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1158 Complex64::new(1.0 / 2.0_f64.sqrt(), 0.0),
1159 Complex64::new(-1.0 / 2.0_f64.sqrt(), 0.0),
1160 ],
1161 )
1162 .expect("Failed to create gate matrix");
1163
1164 let stats = manager
1165 .execute_gate(&mut state_vector, "H", &[0], &gate_matrix)
1166 .expect("Failed to execute gate");
1167
1168 assert_eq!(stats.gate_name, "H");
1169 assert!(stats.execution_time > Duration::from_nanos(0));
1170 assert!(stats.cache_hit_rate >= 0.0 && stats.cache_hit_rate <= 1.0);
1171 }
1172
1173 #[test]
1174 fn test_morton_encoding() {
1175 let encoded = CacheOptimizedStateVector::morton_encode_2d(5, 2); assert!(encoded < 16); }
1178
1179 #[test]
1180 fn test_hilbert_coordinate_conversion() {
1181 let (x, y) = CacheOptimizedStateVector::hilbert_index_to_xy(3, 2);
1182 assert!(x < 4 && y < 4); }
1184
1185 #[test]
1186 fn test_stride_detection() {
1187 let accesses = vec![&0, &4, &8, &12, &16];
1188 let strides = CacheOptimizedStateVector::detect_strides(&accesses);
1189 assert!(strides.contains(&4)); }
1191}