use scirs2_core::ndarray::Array2;
use scirs2_core::parallel_ops::{IndexedParallelIterator, ParallelIterator};
use scirs2_core::Complex64;
use std::alloc::{GlobalAlloc, Layout, System};
use std::collections::{HashMap, VecDeque};
use std::ptr::NonNull;
use std::sync::{Arc, Mutex, RwLock};
use std::time::{Duration, Instant};
use crate::error::{Result, SimulatorError};
use crate::scirs2_integration::SciRS2Backend;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MemoryLayout {
Contiguous,
CacheAligned,
Blocked,
Interleaved,
Hierarchical,
Adaptive,
}
#[derive(Debug, Clone)]
pub struct MemoryOptimizationConfig {
pub layout: MemoryLayout,
pub cache_line_size: usize,
pub l1_cache_size: usize,
pub l2_cache_size: usize,
pub l3_cache_size: usize,
pub block_size: usize,
pub enable_prefetching: bool,
pub prefetch_distance: usize,
pub enable_numa_optimization: bool,
pub memory_pool_size: usize,
pub enable_bandwidth_monitoring: bool,
pub adaptation_threshold: f64,
}
impl Default for MemoryOptimizationConfig {
fn default() -> Self {
Self {
layout: MemoryLayout::Adaptive,
cache_line_size: 64, l1_cache_size: 32 * 1024, l2_cache_size: 256 * 1024, l3_cache_size: 8 * 1024 * 1024, block_size: 4096, enable_prefetching: true,
prefetch_distance: 4,
enable_numa_optimization: true,
memory_pool_size: 1024 * 1024 * 1024, enable_bandwidth_monitoring: true,
adaptation_threshold: 0.1,
}
}
}
#[derive(Debug, Clone)]
pub struct MemoryAccessPattern {
pub access_frequency: HashMap<usize, u64>,
pub sequential_accesses: VecDeque<(usize, usize)>,
pub random_accesses: VecDeque<usize>,
pub cache_misses: u64,
pub total_accesses: u64,
pub last_access_time: Instant,
}
impl Default for MemoryAccessPattern {
fn default() -> Self {
Self {
access_frequency: HashMap::new(),
sequential_accesses: VecDeque::new(),
random_accesses: VecDeque::new(),
cache_misses: 0,
total_accesses: 0,
last_access_time: Instant::now(),
}
}
}
#[derive(Debug, Clone)]
pub struct BandwidthMonitor {
pub bandwidth_samples: VecDeque<(Instant, f64)>,
pub current_utilization: f64,
pub peak_bandwidth: f64,
pub average_bandwidth: f64,
pub latency_samples: VecDeque<Duration>,
}
impl Default for BandwidthMonitor {
fn default() -> Self {
Self {
bandwidth_samples: VecDeque::new(),
current_utilization: 0.0,
peak_bandwidth: 0.0,
average_bandwidth: 0.0,
latency_samples: VecDeque::new(),
}
}
}
#[derive(Debug)]
pub struct MemoryPool {
blocks: Mutex<Vec<(*mut u8, usize)>>,
block_size: usize,
max_blocks: usize,
allocated_count: Mutex<usize>,
}
impl MemoryPool {
pub const fn new(block_size: usize, max_blocks: usize) -> Result<Self> {
Ok(Self {
blocks: Mutex::new(Vec::new()),
block_size,
max_blocks,
allocated_count: Mutex::new(0),
})
}
pub fn allocate(&self) -> Result<NonNull<u8>> {
let mut blocks = self
.blocks
.lock()
.map_err(|e| SimulatorError::MemoryAllocationFailed(format!("Lock poisoned: {e}")))?;
if let Some((ptr, _)) = blocks.pop() {
Ok(unsafe { NonNull::new_unchecked(ptr) })
} else {
let layout = Layout::from_size_align(self.block_size, 64)
.map_err(|e| SimulatorError::MemoryAllocationFailed(e.to_string()))?;
let ptr = unsafe { System.alloc(layout) };
if ptr.is_null() {
return Err(SimulatorError::MemoryAllocationFailed(
"Failed to allocate memory block".to_string(),
));
}
let mut count = self.allocated_count.lock().map_err(|e| {
SimulatorError::MemoryAllocationFailed(format!("Lock poisoned: {e}"))
})?;
*count += 1;
Ok(unsafe { NonNull::new_unchecked(ptr) })
}
}
pub fn deallocate(&self, ptr: NonNull<u8>) -> Result<()> {
let mut blocks = self
.blocks
.lock()
.map_err(|e| SimulatorError::MemoryAllocationFailed(format!("Lock poisoned: {e}")))?;
if blocks.len() < self.max_blocks {
blocks.push((ptr.as_ptr(), self.block_size));
} else {
let layout = Layout::from_size_align(self.block_size, 64)
.map_err(|e| SimulatorError::MemoryAllocationFailed(e.to_string()))?;
unsafe { System.dealloc(ptr.as_ptr(), layout) };
let mut count = self.allocated_count.lock().map_err(|e| {
SimulatorError::MemoryAllocationFailed(format!("Lock poisoned: {e}"))
})?;
*count -= 1;
}
Ok(())
}
}
unsafe impl Send for MemoryPool {}
unsafe impl Sync for MemoryPool {}
#[derive(Debug)]
pub struct OptimizedStateVector {
data: Vec<Complex64>,
num_qubits: usize,
layout: MemoryLayout,
block_size: usize,
access_pattern: Arc<RwLock<MemoryAccessPattern>>,
bandwidth_monitor: Arc<RwLock<BandwidthMonitor>>,
memory_pool: Arc<MemoryPool>,
config: MemoryOptimizationConfig,
}
impl OptimizedStateVector {
pub fn new(num_qubits: usize, config: MemoryOptimizationConfig) -> Result<Self> {
let size = 1 << num_qubits;
let memory_pool = Arc::new(MemoryPool::new(
config.memory_pool_size / 1024, 1024, )?);
let mut data = Self::allocate_with_layout(size, config.layout, &config)?;
data[0] = Complex64::new(1.0, 0.0);
Ok(Self {
data,
num_qubits,
layout: config.layout,
block_size: config.block_size,
access_pattern: Arc::new(RwLock::new(MemoryAccessPattern::default())),
bandwidth_monitor: Arc::new(RwLock::new(BandwidthMonitor::default())),
memory_pool,
config,
})
}
fn allocate_with_layout(
size: usize,
layout: MemoryLayout,
config: &MemoryOptimizationConfig,
) -> Result<Vec<Complex64>> {
match layout {
MemoryLayout::Contiguous => {
let mut data = Vec::with_capacity(size);
data.resize(size, Complex64::new(0.0, 0.0));
Ok(data)
}
MemoryLayout::CacheAligned => Self::allocate_cache_aligned(size, config),
MemoryLayout::Blocked => Self::allocate_blocked(size, config),
MemoryLayout::Interleaved => Self::allocate_interleaved(size, config),
MemoryLayout::Hierarchical => Self::allocate_hierarchical(size, config),
MemoryLayout::Adaptive => {
Self::allocate_cache_aligned(size, config)
}
}
}
fn allocate_cache_aligned(
size: usize,
config: &MemoryOptimizationConfig,
) -> Result<Vec<Complex64>> {
let element_size = std::mem::size_of::<Complex64>();
let elements_per_line = config.cache_line_size / element_size;
let padded_size = size.div_ceil(elements_per_line) * elements_per_line;
let mut data = Vec::with_capacity(padded_size);
data.resize(size, Complex64::new(0.0, 0.0));
data.resize(padded_size, Complex64::new(0.0, 0.0));
Ok(data)
}
fn allocate_blocked(size: usize, config: &MemoryOptimizationConfig) -> Result<Vec<Complex64>> {
let mut data = Vec::with_capacity(size);
data.resize(size, Complex64::new(0.0, 0.0));
let block_size = config.block_size / std::mem::size_of::<Complex64>();
let num_blocks = size.div_ceil(block_size);
let mut blocked_data = Vec::with_capacity(size);
for block_idx in 0..num_blocks {
let start = block_idx * block_size;
let end = std::cmp::min(start + block_size, size);
blocked_data.extend_from_slice(&data[start..end]);
}
Ok(blocked_data)
}
fn allocate_interleaved(
size: usize,
_config: &MemoryOptimizationConfig,
) -> Result<Vec<Complex64>> {
let mut data = Vec::with_capacity(size);
data.resize(size, Complex64::new(0.0, 0.0));
Ok(data)
}
fn allocate_hierarchical(
size: usize,
config: &MemoryOptimizationConfig,
) -> Result<Vec<Complex64>> {
let l1_elements = config.l1_cache_size / std::mem::size_of::<Complex64>();
let l2_elements = config.l2_cache_size / std::mem::size_of::<Complex64>();
let mut data = Vec::with_capacity(size);
data.resize(size, Complex64::new(0.0, 0.0));
Ok(data)
}
pub fn apply_single_qubit_gate_optimized(
&mut self,
target: usize,
gate_matrix: &Array2<Complex64>,
) -> Result<()> {
let start_time = Instant::now();
let mask = 1 << target;
let size = self.data.len();
match self.layout {
MemoryLayout::Blocked => {
self.apply_single_qubit_gate_blocked(target, gate_matrix, mask)?;
}
MemoryLayout::CacheAligned => {
self.apply_single_qubit_gate_cache_aligned(target, gate_matrix, mask)?;
}
_ => {
self.apply_single_qubit_gate_standard(target, gate_matrix, mask)?;
}
}
let elapsed = start_time.elapsed();
self.update_bandwidth_monitor(size * std::mem::size_of::<Complex64>(), elapsed);
Ok(())
}
fn apply_single_qubit_gate_blocked(
&mut self,
target: usize,
gate_matrix: &Array2<Complex64>,
mask: usize,
) -> Result<()> {
let block_size = self.block_size / std::mem::size_of::<Complex64>();
let num_blocks = self.data.len().div_ceil(block_size);
for block_idx in 0..num_blocks {
let start = block_idx * block_size;
let end = std::cmp::min(start + block_size, self.data.len());
if self.config.enable_prefetching && block_idx + 1 < num_blocks {
let next_start = (block_idx + 1) * block_size;
if next_start < self.data.len() {
Self::prefetch_memory(&self.data[next_start]);
}
}
for i in (start..end).step_by(2) {
if i + 1 < self.data.len() {
let i0 = i & !mask;
let i1 = i0 | mask;
if i1 < self.data.len() {
let amp0 = self.data[i0];
let amp1 = self.data[i1];
self.data[i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
self.data[i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
}
}
}
}
Ok(())
}
fn apply_single_qubit_gate_cache_aligned(
&mut self,
target: usize,
gate_matrix: &Array2<Complex64>,
mask: usize,
) -> Result<()> {
let elements_per_line = self.config.cache_line_size / std::mem::size_of::<Complex64>();
for chunk_start in (0..self.data.len()).step_by(elements_per_line) {
let chunk_end = std::cmp::min(chunk_start + elements_per_line, self.data.len());
if self.config.enable_prefetching && chunk_end < self.data.len() {
Self::prefetch_memory(&self.data[chunk_end]);
}
for i in (chunk_start..chunk_end).step_by(2) {
if i + 1 < self.data.len() {
let i0 = i & !mask;
let i1 = i0 | mask;
if i1 < self.data.len() {
let amp0 = self.data[i0];
let amp1 = self.data[i1];
self.data[i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
self.data[i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
}
}
}
}
Ok(())
}
fn apply_single_qubit_gate_standard(
&mut self,
target: usize,
gate_matrix: &Array2<Complex64>,
mask: usize,
) -> Result<()> {
for i in (0..self.data.len()).step_by(2) {
let i0 = i & !mask;
let i1 = i0 | mask;
if i1 < self.data.len() {
let amp0 = self.data[i0];
let amp1 = self.data[i1];
self.data[i0] = gate_matrix[[0, 0]] * amp0 + gate_matrix[[0, 1]] * amp1;
self.data[i1] = gate_matrix[[1, 0]] * amp0 + gate_matrix[[1, 1]] * amp1;
}
}
Ok(())
}
#[inline(always)]
fn prefetch_memory(addr: &Complex64) {
#[cfg(target_arch = "x86_64")]
{
use std::arch::x86_64::{_mm_prefetch, _MM_HINT_T0};
unsafe {
_mm_prefetch(std::ptr::from_ref(addr).cast::<i8>(), _MM_HINT_T0);
}
}
#[cfg(target_arch = "aarch64")]
{
unsafe {
std::arch::asm!(
"prfm pldl1keep, [{addr}]",
addr = in(reg) std::ptr::from_ref(addr).cast::<u8>(),
options(nostack, readonly, preserves_flags),
);
}
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
unsafe {
let _ = std::ptr::read_volatile(std::ptr::from_ref(addr).cast::<u8>());
}
}
}
pub fn apply_two_qubit_gate_optimized(
&mut self,
control: usize,
target: usize,
gate_matrix: &Array2<Complex64>,
) -> Result<()> {
let start_time = Instant::now();
let control_mask = 1 << control;
let target_mask = 1 << target;
let size = self.data.len();
match self.layout {
MemoryLayout::Blocked => {
self.apply_two_qubit_gate_blocked(control_mask, target_mask, gate_matrix)?;
}
_ => {
self.apply_two_qubit_gate_standard(control_mask, target_mask, gate_matrix)?;
}
}
let elapsed = start_time.elapsed();
self.update_bandwidth_monitor(size * std::mem::size_of::<Complex64>(), elapsed);
Ok(())
}
fn apply_two_qubit_gate_blocked(
&mut self,
control_mask: usize,
target_mask: usize,
gate_matrix: &Array2<Complex64>,
) -> Result<()> {
let block_size = self.block_size / std::mem::size_of::<Complex64>();
let num_blocks = self.data.len().div_ceil(block_size);
for block_idx in 0..num_blocks {
let start = block_idx * block_size;
let end = std::cmp::min(start + block_size, self.data.len());
if self.config.enable_prefetching && block_idx + 1 < num_blocks {
let next_start = (block_idx + 1) * block_size;
if next_start < self.data.len() {
Self::prefetch_memory(&self.data[next_start]);
}
}
for i in (start..end).step_by(4) {
if i + 3 < self.data.len() {
let i00 = i & !(control_mask | target_mask);
let i01 = i00 | target_mask;
let i10 = i00 | control_mask;
let i11 = i00 | control_mask | target_mask;
if i11 < self.data.len() {
let amp00 = self.data[i00];
let amp01 = self.data[i01];
let amp10 = self.data[i10];
let amp11 = self.data[i11];
self.data[i00] = gate_matrix[[0, 0]] * amp00
+ gate_matrix[[0, 1]] * amp01
+ gate_matrix[[0, 2]] * amp10
+ gate_matrix[[0, 3]] * amp11;
self.data[i01] = gate_matrix[[1, 0]] * amp00
+ gate_matrix[[1, 1]] * amp01
+ gate_matrix[[1, 2]] * amp10
+ gate_matrix[[1, 3]] * amp11;
self.data[i10] = gate_matrix[[2, 0]] * amp00
+ gate_matrix[[2, 1]] * amp01
+ gate_matrix[[2, 2]] * amp10
+ gate_matrix[[2, 3]] * amp11;
self.data[i11] = gate_matrix[[3, 0]] * amp00
+ gate_matrix[[3, 1]] * amp01
+ gate_matrix[[3, 2]] * amp10
+ gate_matrix[[3, 3]] * amp11;
}
}
}
}
Ok(())
}
fn apply_two_qubit_gate_standard(
&mut self,
control_mask: usize,
target_mask: usize,
gate_matrix: &Array2<Complex64>,
) -> Result<()> {
for i in (0..self.data.len()).step_by(4) {
let i00 = i & !(control_mask | target_mask);
let i01 = i00 | target_mask;
let i10 = i00 | control_mask;
let i11 = i00 | control_mask | target_mask;
if i11 < self.data.len() {
let amp00 = self.data[i00];
let amp01 = self.data[i01];
let amp10 = self.data[i10];
let amp11 = self.data[i11];
self.data[i00] = gate_matrix[[0, 0]] * amp00
+ gate_matrix[[0, 1]] * amp01
+ gate_matrix[[0, 2]] * amp10
+ gate_matrix[[0, 3]] * amp11;
self.data[i01] = gate_matrix[[1, 0]] * amp00
+ gate_matrix[[1, 1]] * amp01
+ gate_matrix[[1, 2]] * amp10
+ gate_matrix[[1, 3]] * amp11;
self.data[i10] = gate_matrix[[2, 0]] * amp00
+ gate_matrix[[2, 1]] * amp01
+ gate_matrix[[2, 2]] * amp10
+ gate_matrix[[2, 3]] * amp11;
self.data[i11] = gate_matrix[[3, 0]] * amp00
+ gate_matrix[[3, 1]] * amp01
+ gate_matrix[[3, 2]] * amp10
+ gate_matrix[[3, 3]] * amp11;
}
}
Ok(())
}
fn update_bandwidth_monitor(&self, bytes_accessed: usize, elapsed: Duration) {
if let Ok(mut monitor) = self.bandwidth_monitor.write() {
let bandwidth = bytes_accessed as f64 / elapsed.as_secs_f64();
let now = Instant::now();
monitor.bandwidth_samples.push_back((now, bandwidth));
while monitor.bandwidth_samples.len() > 100 {
monitor.bandwidth_samples.pop_front();
}
if bandwidth > monitor.peak_bandwidth {
monitor.peak_bandwidth = bandwidth;
}
let sum: f64 = monitor.bandwidth_samples.iter().map(|(_, bw)| bw).sum();
monitor.average_bandwidth = sum / monitor.bandwidth_samples.len() as f64;
let theoretical_max = 100.0 * 1024.0 * 1024.0 * 1024.0; monitor.current_utilization = bandwidth / theoretical_max;
}
}
pub fn get_bandwidth_stats(&self) -> Result<BandwidthMonitor> {
self.bandwidth_monitor
.read()
.map(|guard| guard.clone())
.map_err(|e| SimulatorError::InvalidState(format!("RwLock poisoned: {e}")))
}
pub fn adapt_memory_layout(&mut self) -> Result<()> {
if self.layout != MemoryLayout::Adaptive {
return Ok(());
}
let access_pattern = self
.access_pattern
.read()
.map_err(|e| SimulatorError::InvalidState(format!("RwLock poisoned: {e}")))?;
let bandwidth_stats = self
.bandwidth_monitor
.read()
.map_err(|e| SimulatorError::InvalidState(format!("RwLock poisoned: {e}")))?;
let sequential_ratio = access_pattern.sequential_accesses.len() as f64
/ (access_pattern.total_accesses as f64 + 1.0);
let new_layout = if sequential_ratio > 0.8 {
MemoryLayout::CacheAligned
} else if bandwidth_stats.current_utilization < 0.5 {
MemoryLayout::Blocked
} else {
MemoryLayout::Hierarchical
};
if new_layout != self.layout {
let new_data = Self::allocate_with_layout(self.data.len(), new_layout, &self.config)?;
self.data = new_data;
self.layout = new_layout;
}
Ok(())
}
#[must_use]
pub fn get_memory_stats(&self) -> MemoryStats {
let element_size = std::mem::size_of::<Complex64>();
MemoryStats {
total_memory: self.data.len() * element_size,
allocated_memory: self.data.capacity() * element_size,
layout: self.layout,
cache_efficiency: self.calculate_cache_efficiency(),
memory_utilization: self.calculate_memory_utilization(),
}
}
fn calculate_cache_efficiency(&self) -> f64 {
let access_pattern = match self.access_pattern.read() {
Ok(guard) => guard,
Err(_) => return 1.0, };
if access_pattern.total_accesses == 0 {
return 1.0;
}
let hit_rate =
1.0 - (access_pattern.cache_misses as f64 / access_pattern.total_accesses as f64);
hit_rate.clamp(0.0, 1.0)
}
fn calculate_memory_utilization(&self) -> f64 {
match self.bandwidth_monitor.read() {
Ok(guard) => guard.current_utilization,
Err(_) => 0.0, }
}
#[must_use]
pub fn data(&self) -> &[Complex64] {
&self.data
}
pub fn data_mut(&mut self) -> &mut [Complex64] {
if let Ok(mut pattern) = self.access_pattern.write() {
pattern.total_accesses += 1;
pattern.last_access_time = Instant::now();
}
&mut self.data
}
}
#[derive(Debug, Clone)]
pub struct MemoryStats {
pub total_memory: usize,
pub allocated_memory: usize,
pub layout: MemoryLayout,
pub cache_efficiency: f64,
pub memory_utilization: f64,
}
#[derive(Debug)]
pub struct MemoryBandwidthOptimizer {
config: MemoryOptimizationConfig,
memory_pool: Arc<MemoryPool>,
backend: Option<SciRS2Backend>,
}
impl MemoryBandwidthOptimizer {
pub fn new(config: MemoryOptimizationConfig) -> Result<Self> {
let memory_pool = Arc::new(MemoryPool::new(config.memory_pool_size / 1024, 1024)?);
Ok(Self {
config,
memory_pool,
backend: None,
})
}
pub fn init_scirs2_backend(&mut self) -> Result<()> {
let backend = SciRS2Backend::new();
self.backend = Some(backend);
Ok(())
}
pub fn create_optimized_state_vector(&self, num_qubits: usize) -> Result<OptimizedStateVector> {
OptimizedStateVector::new(num_qubits, self.config.clone())
}
pub fn optimize_circuit_memory_access(
&self,
state_vector: &mut OptimizedStateVector,
circuit_depth: usize,
) -> Result<MemoryOptimizationReport> {
let start_time = Instant::now();
let estimated_accesses = circuit_depth * state_vector.data.len();
state_vector.adapt_memory_layout()?;
if self.config.enable_prefetching {
Self::warmup_caches(state_vector)?;
}
let optimization_time = start_time.elapsed();
Ok(MemoryOptimizationReport {
optimization_time,
estimated_memory_accesses: estimated_accesses,
cache_warmup_performed: self.config.enable_prefetching,
layout_adaptation_performed: true,
memory_stats: state_vector.get_memory_stats(),
})
}
fn warmup_caches(state_vector: &OptimizedStateVector) -> Result<()> {
let chunk_size = state_vector.config.cache_line_size / std::mem::size_of::<Complex64>();
for chunk_start in (0..state_vector.data.len()).step_by(chunk_size) {
let chunk_end = std::cmp::min(chunk_start + chunk_size, state_vector.data.len());
for i in (chunk_start..chunk_end).step_by(chunk_size / 4) {
let _ = state_vector.data[i]; }
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct MemoryOptimizationReport {
pub optimization_time: Duration,
pub estimated_memory_accesses: usize,
pub cache_warmup_performed: bool,
pub layout_adaptation_performed: bool,
pub memory_stats: MemoryStats,
}
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
#[test]
fn test_optimized_state_vector_creation() {
let config = MemoryOptimizationConfig::default();
let state_vector = OptimizedStateVector::new(3, config)
.expect("OptimizedStateVector creation should succeed");
assert_eq!(state_vector.num_qubits, 3);
assert_eq!(state_vector.data.len(), 8);
assert_eq!(state_vector.data[0], Complex64::new(1.0, 0.0));
}
#[test]
fn test_memory_layouts() {
let config = MemoryOptimizationConfig {
layout: MemoryLayout::CacheAligned,
..Default::default()
};
let state_vector = OptimizedStateVector::new(4, config)
.expect("OptimizedStateVector with CacheAligned layout should be created");
assert_eq!(state_vector.layout, MemoryLayout::CacheAligned);
}
#[test]
fn test_single_qubit_gate_optimization() {
let config = MemoryOptimizationConfig::default();
let mut state_vector = OptimizedStateVector::new(2, config)
.expect("OptimizedStateVector creation should succeed");
let gate_matrix = Array2::from_shape_vec(
(2, 2),
vec![
Complex64::new(0.0, 0.0),
Complex64::new(1.0, 0.0),
Complex64::new(1.0, 0.0),
Complex64::new(0.0, 0.0),
],
)
.expect("Gate matrix construction should succeed");
state_vector
.apply_single_qubit_gate_optimized(0, &gate_matrix)
.expect("Single qubit gate application should succeed");
assert!((state_vector.data[1].re - 1.0).abs() < 1e-10);
assert!(state_vector.data[0].re.abs() < 1e-10);
}
#[test]
fn test_bandwidth_monitoring() {
let config = MemoryOptimizationConfig::default();
let state_vector = OptimizedStateVector::new(3, config)
.expect("OptimizedStateVector creation should succeed");
let stats = state_vector
.get_bandwidth_stats()
.expect("Bandwidth stats retrieval should succeed");
assert_eq!(stats.bandwidth_samples.len(), 0); }
#[test]
fn test_memory_pool() {
let pool = MemoryPool::new(1024, 10).expect("MemoryPool creation should succeed");
let ptr1 = pool.allocate().expect("First allocation should succeed");
let ptr2 = pool.allocate().expect("Second allocation should succeed");
pool.deallocate(ptr1)
.expect("First deallocation should succeed");
pool.deallocate(ptr2)
.expect("Second deallocation should succeed");
}
#[test]
fn test_cache_aligned_allocation() {
let config = MemoryOptimizationConfig {
layout: MemoryLayout::CacheAligned,
cache_line_size: 64,
..Default::default()
};
let data = OptimizedStateVector::allocate_cache_aligned(100, &config)
.expect("Cache-aligned allocation should succeed");
let element_size = std::mem::size_of::<Complex64>();
let elements_per_line = config.cache_line_size / element_size;
let expected_padded = 100_usize.div_ceil(elements_per_line) * elements_per_line;
assert_eq!(data.len(), expected_padded);
}
#[test]
fn test_memory_bandwidth_optimizer() {
let config = MemoryOptimizationConfig::default();
let optimizer = MemoryBandwidthOptimizer::new(config)
.expect("MemoryBandwidthOptimizer creation should succeed");
let mut state_vector = optimizer
.create_optimized_state_vector(4)
.expect("Optimized state vector creation should succeed");
let report = optimizer
.optimize_circuit_memory_access(&mut state_vector, 10)
.expect("Circuit memory optimization should succeed");
assert!(report.optimization_time.as_millis() < u128::MAX);
assert_eq!(report.estimated_memory_accesses, 10 * 16); }
#[test]
fn test_adaptive_layout() {
let config = MemoryOptimizationConfig {
layout: MemoryLayout::Adaptive,
..Default::default()
};
let mut state_vector = OptimizedStateVector::new(3, config)
.expect("OptimizedStateVector with Adaptive layout should be created");
state_vector
.adapt_memory_layout()
.expect("Memory layout adaptation should succeed");
assert!(matches!(
state_vector.layout,
MemoryLayout::CacheAligned | MemoryLayout::Blocked | MemoryLayout::Hierarchical
));
}
#[test]
fn test_memory_stats() {
let config = MemoryOptimizationConfig::default();
let state_vector = OptimizedStateVector::new(4, config)
.expect("OptimizedStateVector creation should succeed");
let stats = state_vector.get_memory_stats();
assert_eq!(stats.total_memory, 16 * std::mem::size_of::<Complex64>());
assert!(stats.cache_efficiency >= 0.0 && stats.cache_efficiency <= 1.0);
}
#[test]
fn test_blocked_layout_allocation() {
let config = MemoryOptimizationConfig {
layout: MemoryLayout::Blocked,
block_size: 1024,
..Default::default()
};
let data = OptimizedStateVector::allocate_blocked(100, &config)
.expect("Blocked layout allocation should succeed");
assert_eq!(data.len(), 100);
}
#[test]
fn test_prefetch_functionality() {
let config = MemoryOptimizationConfig {
enable_prefetching: true,
prefetch_distance: 4,
..Default::default()
};
let state_vector = OptimizedStateVector::new(5, config)
.expect("OptimizedStateVector with prefetching enabled should be created");
OptimizedStateVector::prefetch_memory(&state_vector.data[0]);
}
}