use crate::error::{LinalgError, LinalgResult};
use scirs2_core::ndarray::{Array2, ArrayView2, ArrayViewMut2};
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{Duration, Instant};
#[derive(Debug)]
pub struct MemoryAccessPatternAnalyzer {
sequential_access_count: AtomicU64,
random_access_count: AtomicU64,
stride_access_patterns: Vec<(usize, u64)>, #[allow(dead_code)]
predicted_miss_rate: f64,
}
impl MemoryAccessPatternAnalyzer {
pub fn new() -> Self {
Self {
sequential_access_count: AtomicU64::new(0),
random_access_count: AtomicU64::new(0),
stride_access_patterns: Vec::new(),
predicted_miss_rate: 0.05, }
}
pub fn analyze_and_recommend_prefetch(&self, matrixdims: (usize, usize)) -> PrefetchStrategy {
let (m, n) = matrixdims;
let total_elements = m * n;
if total_elements > 1_000_000 {
PrefetchStrategy::Aggressive {
prefetch_distance: 8,
prefetch_hint: PrefetchHint::T0, }
} else if total_elements > 100_000 {
PrefetchStrategy::Moderate {
prefetch_distance: 4,
prefetch_hint: PrefetchHint::T1, }
} else {
PrefetchStrategy::Conservative {
prefetch_distance: 2,
prefetch_hint: PrefetchHint::T2, }
}
}
pub fn record_access_pattern(&mut self, accesstype: AccessType) {
match accesstype {
AccessType::Sequential => {
self.sequential_access_count.fetch_add(1, Ordering::Relaxed);
}
AccessType::Random => {
self.random_access_count.fetch_add(1, Ordering::Relaxed);
}
AccessType::Strided(stride) => {
if let Some(pattern) = self
.stride_access_patterns
.iter_mut()
.find(|(s_, _)| *s_ == stride)
{
pattern.1 += 1;
} else {
self.stride_access_patterns.push((stride, 1));
}
}
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum AccessType {
Sequential,
Random,
Strided(usize),
}
#[derive(Debug, Clone, Copy)]
pub enum PrefetchStrategy {
Conservative {
prefetch_distance: usize,
prefetch_hint: PrefetchHint,
},
Moderate {
prefetch_distance: usize,
prefetch_hint: PrefetchHint,
},
Aggressive {
prefetch_distance: usize,
prefetch_hint: PrefetchHint,
},
}
#[derive(Debug, Clone, Copy)]
pub enum PrefetchHint {
T0, T1, T2, NTA, }
pub struct CacheAwareMatrixOperations {
l1_cachesize: usize,
l2_cachesize: usize,
l3_cachesize: usize,
#[allow(dead_code)]
cache_linesize: usize,
pattern_analyzer: MemoryAccessPatternAnalyzer,
}
impl CacheAwareMatrixOperations {
pub fn new() -> Self {
Self {
l1_cachesize: 32 * 1024, l2_cachesize: 512 * 1024, l3_cachesize: 8 * 1024 * 1024, cache_linesize: 64, pattern_analyzer: MemoryAccessPatternAnalyzer::new(),
}
}
pub fn calculate_optimal_blocksizes(&self, elementsize: usize) -> CacheBlockSizes {
let l1_elements = (self.l1_cachesize / 3) / elementsize; let l1_blocksize = (l1_elements as f64).sqrt() as usize;
let l2_elements = (self.l2_cachesize / 3) / elementsize;
let l2_blocksize = (l2_elements as f64).sqrt() as usize;
let l3_elements = (self.l3_cachesize / 3) / elementsize;
let l3_blocksize = (l3_elements as f64).sqrt() as usize;
CacheBlockSizes {
l1_block_m: l1_blocksize.min(256),
l1_block_n: l1_blocksize.min(256),
l1_block_k: l1_blocksize.min(256),
l2_block_m: l2_blocksize.min(1024),
l2_block_n: l2_blocksize.min(1024),
l2_block_k: l2_blocksize.min(1024),
l3_block_m: l3_blocksize.min(4096),
l3_block_n: l3_blocksize.min(4096),
l3_block_k: l3_blocksize.min(4096),
}
}
pub fn cache_aware_gemm_f32(
&mut self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &mut ArrayViewMut2<f32>,
) -> LinalgResult<()> {
let (m, k) = a.dim();
let (_, n) = b.dim();
if k != b.nrows() || m != c.nrows() || n != c.ncols() {
return Err(LinalgError::ShapeError(
"Matrix dimensions incompatible for multiplication".to_string(),
));
}
let blocksizes = self.calculate_optimal_blocksizes(std::mem::size_of::<f32>());
let prefetch_strategy = self.pattern_analyzer.analyze_and_recommend_prefetch((m, n));
self.three_level_blocked_gemm(a, b, c, &blocksizes, &prefetch_strategy)?;
Ok(())
}
fn three_level_blocked_gemm(
&mut self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &mut ArrayViewMut2<f32>,
blocksizes: &CacheBlockSizes,
prefetch_strategy: &PrefetchStrategy,
) -> LinalgResult<()> {
let (m, k) = a.dim();
let (_, n) = b.dim();
for ii in (0..m).step_by(blocksizes.l3_block_m) {
for jj in (0..n).step_by(blocksizes.l3_block_n) {
for kk in (0..k).step_by(blocksizes.l3_block_k) {
let i_end = (ii + blocksizes.l3_block_m).min(m);
let j_end = (jj + blocksizes.l3_block_n).min(n);
let k_end = (kk + blocksizes.l3_block_k).min(k);
for i2 in (ii..i_end).step_by(blocksizes.l2_block_m) {
for j2 in (jj..j_end).step_by(blocksizes.l2_block_n) {
for k2 in (kk..k_end).step_by(blocksizes.l2_block_k) {
let i2_end = (i2 + blocksizes.l2_block_m).min(i_end);
let j2_end = (j2 + blocksizes.l2_block_n).min(j_end);
let k2_end = (k2 + blocksizes.l2_block_k).min(k_end);
self.l1_blocked_gemm_with_prefetch(
a,
b,
c,
i2,
i2_end,
j2,
j2_end,
k2,
k2_end,
blocksizes,
prefetch_strategy,
)?;
}
}
}
}
}
}
Ok(())
}
fn l1_blocked_gemm_with_prefetch(
&mut self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &mut ArrayViewMut2<f32>,
i_start: usize,
i_end: usize,
j_start: usize,
j_end: usize,
k_start: usize,
k_end: usize,
blocksizes: &CacheBlockSizes,
prefetch_strategy: &PrefetchStrategy,
) -> LinalgResult<()> {
for i in (i_start..i_end).step_by(blocksizes.l1_block_m) {
for j in (j_start..j_end).step_by(blocksizes.l1_block_n) {
for k_iter in (k_start..k_end).step_by(blocksizes.l1_block_k) {
let i_block_end = (i + blocksizes.l1_block_m).min(i_end);
let j_block_end = (j + blocksizes.l1_block_n).min(j_end);
let k_block_end = (k_iter + blocksizes.l1_block_k).min(k_end);
self.intelligent_prefetch(a, b, c, i, j, k_iter, prefetch_strategy);
for ii in i..i_block_end {
for jj in j..j_block_end {
let mut sum = 0.0f32;
for kk in k_iter..k_block_end {
sum += a[[ii, kk]] * b[[kk, jj]];
}
c[[ii, jj]] += sum;
}
}
}
}
}
self.pattern_analyzer
.record_access_pattern(AccessType::Sequential);
Ok(())
}
fn intelligent_prefetch(
&self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
c: &ArrayViewMut2<f32>,
i: usize,
j: usize,
k: usize,
strategy: &PrefetchStrategy,
) {
let (prefetch_distance, hint) = match strategy {
PrefetchStrategy::Conservative {
prefetch_distance,
prefetch_hint,
} => (*prefetch_distance, *prefetch_hint),
PrefetchStrategy::Moderate {
prefetch_distance,
prefetch_hint,
} => (*prefetch_distance, *prefetch_hint),
PrefetchStrategy::Aggressive {
prefetch_distance,
prefetch_hint,
} => (*prefetch_distance, *prefetch_hint),
};
#[cfg(target_arch = "x86_64")]
unsafe {
macro_rules! prefetch_with_hint {
($ptr:expr, $hint:expr) => {
match $hint {
PrefetchHint::T0 => _mm_prefetch($ptr as *const i8, 3),
PrefetchHint::T1 => _mm_prefetch($ptr as *const i8, 2),
PrefetchHint::T2 => _mm_prefetch($ptr as *const i8, 1),
PrefetchHint::NTA => _mm_prefetch($ptr as *const i8, 0),
}
};
}
if i + prefetch_distance < a.nrows() {
let a_ptr = &a[[i + prefetch_distance, k]] as *const f32;
prefetch_with_hint!(a_ptr, hint);
}
if j + prefetch_distance < b.ncols() {
let b_ptr = &b[[k, j + prefetch_distance]] as *const f32;
prefetch_with_hint!(b_ptr, hint);
}
if i + prefetch_distance < c.nrows() && j + prefetch_distance < c.ncols() {
let c_ptr = &c[[i + prefetch_distance, j + prefetch_distance]] as *const f32;
prefetch_with_hint!(c_ptr, hint);
}
}
#[cfg(not(target_arch = "x86_64"))]
{
let _ = (a, b, c, i, j, k, strategy);
}
}
pub fn cache_aware_transpose_f32(
&mut self,
input: &ArrayView2<f32>,
) -> LinalgResult<Array2<f32>> {
let (rows, cols) = input.dim();
let mut result = Array2::zeros((cols, rows));
let elementsize = std::mem::size_of::<f32>();
let optimal_blocksize = ((self.l1_cachesize / 2) / elementsize).min(64);
let blocksize = (optimal_blocksize as f64).sqrt() as usize;
for i in (0..rows).step_by(blocksize) {
for j in (0..cols).step_by(blocksize) {
let i_end = (i + blocksize).min(rows);
let j_end = (j + blocksize).min(cols);
for ii in i..i_end {
for jj in j..j_end {
result[[jj, ii]] = input[[ii, jj]];
}
}
}
}
self.pattern_analyzer
.record_access_pattern(AccessType::Strided(rows));
Ok(result)
}
}
#[derive(Debug, Clone)]
pub struct CacheBlockSizes {
pub l1_block_m: usize,
pub l1_block_n: usize,
pub l1_block_k: usize,
pub l2_block_m: usize,
pub l2_block_n: usize,
pub l2_block_k: usize,
pub l3_block_m: usize,
pub l3_block_n: usize,
pub l3_block_k: usize,
}
pub struct RuntimePerformanceProfiler {
timing_history: Vec<(String, Duration)>,
cache_miss_rates: Vec<f64>,
#[allow(dead_code)]
optimization_scores: Vec<f64>,
session_start: Option<Instant>,
}
impl RuntimePerformanceProfiler {
pub fn new() -> Self {
Self {
timing_history: Vec::new(),
cache_miss_rates: Vec::new(),
optimization_scores: Vec::new(),
session_start: None,
}
}
pub fn start_session(&mut self, operationname: &str) {
self.session_start = Some(Instant::now());
self.timing_history
.push((operationname.to_string(), Duration::ZERO));
}
pub fn end_session(&mut self) -> Option<Duration> {
if let Some(start_time) = self.session_start.take() {
let duration = start_time.elapsed();
if let Some(last_entry) = self.timing_history.last_mut() {
last_entry.1 = duration;
}
Some(duration)
} else {
None
}
}
pub fn analyze_and_recommend(&self) -> Vec<OptimizationRecommendation> {
let mut recommendations = Vec::new();
if let Some(avg_time) = self.calculate_average_operation_time() {
if avg_time > Duration::from_millis(100) {
recommendations.push(OptimizationRecommendation::IncreaseBlockSize);
recommendations.push(OptimizationRecommendation::EnableAggressivePrefetch);
} else if avg_time < Duration::from_millis(10) {
recommendations.push(OptimizationRecommendation::DecreaseBlockSize);
}
}
if let Some(avg_miss_rate) = self.calculate_average_cache_miss_rate() {
if avg_miss_rate > 0.1 {
recommendations.push(OptimizationRecommendation::OptimizeMemoryLayout);
recommendations.push(OptimizationRecommendation::IncreaseBlockSize);
}
}
recommendations
}
fn calculate_average_operation_time(&self) -> Option<Duration> {
if self.timing_history.is_empty() {
return None;
}
let total_nanos: u64 = self
.timing_history
.iter()
.map(|(_, duration)| duration.as_nanos() as u64)
.sum();
Some(Duration::from_nanos(
total_nanos / self.timing_history.len() as u64,
))
}
fn calculate_average_cache_miss_rate(&self) -> Option<f64> {
if self.cache_miss_rates.is_empty() {
return None;
}
Some(self.cache_miss_rates.iter().sum::<f64>() / self.cache_miss_rates.len() as f64)
}
}
#[derive(Debug, Clone)]
pub enum OptimizationRecommendation {
IncreaseBlockSize,
DecreaseBlockSize,
EnableAggressivePrefetch,
OptimizeMemoryLayout,
SwitchToSIMDImplementation,
UseParallelExecution,
}
impl Default for MemoryAccessPatternAnalyzer {
fn default() -> Self {
Self::new()
}
}
impl Default for CacheAwareMatrixOperations {
fn default() -> Self {
Self::new()
}
}
impl Default for RuntimePerformanceProfiler {
fn default() -> Self {
Self::new()
}
}
pub struct BranchOptimizer;
impl BranchOptimizer {
#[allow(dead_code)]
#[inline(always)]
pub fn likely_branch<T>(_condition: bool, if_true: T, iffalse: T) -> T {
if _condition {
if_true
} else {
iffalse
}
}
#[allow(dead_code)]
#[inline(always)]
pub fn unlikely_branch<T>(_condition: bool, if_true: T, iffalse: T) -> T {
if _condition {
if_true
} else {
iffalse
}
}
#[allow(dead_code)]
pub fn unrolled_loop_with_prefetch<F>(
start: usize,
end: usize,
unroll_factor: usize,
mut operation: F,
) where
F: FnMut(usize),
{
let mut i = start;
while i + unroll_factor <= end {
for offset in 0..unroll_factor {
operation(i + offset);
}
i += unroll_factor;
}
while i < end {
operation(i);
i += 1;
}
}
}
pub struct AdaptiveVectorizationEngine {
cpu_features: CpuFeatures,
strategy_performance: std::collections::HashMap<VectorizationStrategy, f64>,
auto_tuning_enabled: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum VectorizationStrategy {
SSE42,
AVX,
AVX2,
AVX512,
Scalar,
}
#[derive(Debug, Clone)]
pub struct CpuFeatures {
pub sse42: bool,
pub avx: bool,
pub avx2: bool,
pub avx512: bool,
pub fma: bool,
pub cache_linesize: usize,
}
impl AdaptiveVectorizationEngine {
pub fn new() -> Self {
let cpu_features = Self::detect_cpu_features();
Self {
cpu_features,
strategy_performance: std::collections::HashMap::new(),
auto_tuning_enabled: true,
}
}
#[allow(dead_code)]
fn detect_cpu_features() -> CpuFeatures {
#[cfg(target_arch = "x86_64")]
{
CpuFeatures {
sse42: is_x86_feature_detected!("sse4.2"),
avx: is_x86_feature_detected!("avx"),
avx2: is_x86_feature_detected!("avx2"),
avx512: is_x86_feature_detected!("avx512f"),
fma: is_x86_feature_detected!("fma"),
cache_linesize: 64, }
}
#[cfg(not(target_arch = "x86_64"))]
{
CpuFeatures {
sse42: false,
avx: false,
avx2: false,
avx512: false,
fma: false,
cache_linesize: 64,
}
}
}
pub fn select_optimal_strategy(&self, matrixsize: (usize, usize)) -> VectorizationStrategy {
let (rows, cols) = matrixsize;
let total_elements = rows * cols;
if total_elements > 100_000 {
if self.cpu_features.avx512 {
return VectorizationStrategy::AVX512;
} else if self.cpu_features.avx2 {
return VectorizationStrategy::AVX2;
} else if self.cpu_features.avx {
return VectorizationStrategy::AVX;
}
}
if total_elements > 10_000 {
if self.cpu_features.avx2 {
return VectorizationStrategy::AVX2;
} else if self.cpu_features.avx {
return VectorizationStrategy::AVX;
} else if self.cpu_features.sse42 {
return VectorizationStrategy::SSE42;
}
}
if self.cpu_features.sse42 && total_elements > 1_000 {
VectorizationStrategy::SSE42
} else {
VectorizationStrategy::Scalar
}
}
#[allow(dead_code)]
pub fn adaptivematrix_multiply_f32(
&mut self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
) -> LinalgResult<Array2<f32>> {
let start_time = Instant::now();
let strategy = self.select_optimal_strategy((a.nrows(), a.ncols()));
let result = match strategy {
VectorizationStrategy::AVX512 => self.matrix_multiply_avx512_f32(a, b),
VectorizationStrategy::AVX2 => self.matrix_multiply_avx2_f32(a, b),
VectorizationStrategy::AVX => self.matrix_multiply_avx_f32(a, b),
VectorizationStrategy::SSE42 => self.matrix_multiply_sse42_f32(a, b),
VectorizationStrategy::Scalar => self.matrix_multiply_scalar_f32(a, b),
};
if self.auto_tuning_enabled {
let duration = start_time.elapsed().as_secs_f64();
self.strategy_performance.insert(strategy, duration);
}
result
}
#[allow(dead_code)]
fn matrix_multiply_avx512_f32(
&self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
) -> LinalgResult<Array2<f32>> {
self.matrix_multiply_avx2_f32(a, b)
}
#[allow(dead_code)]
fn matrix_multiply_avx2_f32(
&self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
) -> LinalgResult<Array2<f32>> {
if a.ncols() != b.nrows() {
return Err(LinalgError::ShapeError(
"Matrix dimensions incompatible for multiplication".to_string(),
));
}
let (m, k) = a.dim();
let n = b.ncols();
let mut result = Array2::zeros((m, n));
const BLOCK_SIZE: usize = 64;
for i in (0..m).step_by(BLOCK_SIZE) {
for j in (0..n).step_by(BLOCK_SIZE) {
for kk in (0..k).step_by(BLOCK_SIZE) {
let i_end = (i + BLOCK_SIZE).min(m);
let j_end = (j + BLOCK_SIZE).min(n);
let k_end = (kk + BLOCK_SIZE).min(k);
for ii in i..i_end {
for jj in (j..j_end).step_by(8) {
let jj_end = (jj + 8).min(j_end);
for kkk in kk..k_end {
let a_val = a[[ii, kkk]];
for jjj in jj..jj_end {
result[[ii, jjj]] += a_val * b[[kkk, jjj]];
}
}
}
}
}
}
}
Ok(result)
}
#[allow(dead_code)]
fn matrix_multiply_avx_f32(
&self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
) -> LinalgResult<Array2<f32>> {
self.matrix_multiply_scalar_f32(a, b)
}
#[allow(dead_code)]
fn matrix_multiply_sse42_f32(
&self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
) -> LinalgResult<Array2<f32>> {
self.matrix_multiply_scalar_f32(a, b)
}
fn matrix_multiply_scalar_f32(
&self,
a: &ArrayView2<f32>,
b: &ArrayView2<f32>,
) -> LinalgResult<Array2<f32>> {
if a.ncols() != b.nrows() {
return Err(LinalgError::ShapeError(
"Matrix dimensions incompatible for multiplication".to_string(),
));
}
let (m, k) = a.dim();
let n = b.ncols();
let mut result = Array2::zeros((m, n));
for i in 0..m {
for j in 0..n {
for kk in 0..k {
result[[i, j]] += a[[i, kk]] * b[[kk, j]];
}
}
}
Ok(result)
}
#[allow(dead_code)]
pub fn get_performance_report(&self) -> std::collections::HashMap<VectorizationStrategy, f64> {
self.strategy_performance.clone()
}
#[allow(dead_code)]
pub fn set_auto_tuning(&mut self, enabled: bool) {
self.auto_tuning_enabled = enabled;
}
}
impl Default for AdaptiveVectorizationEngine {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_cache_awarematrix_operations() {
let mut cache_ops = CacheAwareMatrixOperations::new();
let a = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]];
let b = array![[7.0, 8.0], [9.0, 10.0], [11.0, 12.0]];
let mut c = Array2::zeros((2, 2));
let result = cache_ops.cache_aware_gemm_f32(&a.view(), &b.view(), &mut c.view_mut());
assert!(result.is_ok());
assert_abs_diff_eq!(c[[0, 0]], 58.0, epsilon = 1e-6);
assert_abs_diff_eq!(c[[0, 1]], 64.0, epsilon = 1e-6);
assert_abs_diff_eq!(c[[1, 0]], 139.0, epsilon = 1e-6);
assert_abs_diff_eq!(c[[1, 1]], 154.0, epsilon = 1e-6);
}
#[test]
fn test_cache_aware_transpose() {
let mut cache_ops = CacheAwareMatrixOperations::new();
let input = array![[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]];
let result = cache_ops
.cache_aware_transpose_f32(&input.view())
.expect("Operation failed");
let expected = array![[1.0, 4.0], [2.0, 5.0], [3.0, 6.0]];
for (actual, expected) in result.iter().zip(expected.iter()) {
assert_abs_diff_eq!(*actual, *expected, epsilon = 1e-10);
}
}
#[test]
fn test_memory_access_pattern_analyzer() {
let analyzer = MemoryAccessPatternAnalyzer::new();
let strategy = analyzer.analyze_and_recommend_prefetch((1000, 1000));
if let PrefetchStrategy::Aggressive {
prefetch_distance, ..
} = strategy
{
assert!(prefetch_distance > 0);
}
}
#[test]
fn test_runtime_performance_profiler() {
let mut profiler = RuntimePerformanceProfiler::new();
profiler.start_session("test_operation");
std::thread::sleep(Duration::from_millis(1));
let duration = profiler.end_session();
assert!(duration.is_some());
assert!(duration.expect("Operation failed") >= Duration::from_millis(1));
let recommendations = profiler.analyze_and_recommend();
assert!(!recommendations.is_empty() || profiler.timing_history.len() < 2);
}
#[test]
fn test_branch_optimizer() {
let result1 = BranchOptimizer::likely_branch(true, 42, 0);
assert_eq!(result1, 42);
let result2 = BranchOptimizer::unlikely_branch(false, 0, 42);
assert_eq!(result2, 42);
}
#[test]
fn test_adaptive_vectorization_engine() {
let mut engine = AdaptiveVectorizationEngine::new();
let features = &engine.cpu_features;
assert!(features.cache_linesize > 0);
let small_strategy = engine.select_optimal_strategy((10, 10));
let medium_strategy = engine.select_optimal_strategy((100, 100));
let large_strategy = engine.select_optimal_strategy((1000, 1000));
assert!(matches!(
small_strategy,
VectorizationStrategy::Scalar | VectorizationStrategy::SSE42
));
println!("Small matrix strategy: {:?}", small_strategy);
println!("Medium matrix strategy: {:?}", medium_strategy);
println!("Large matrix strategy: {:?}", large_strategy);
let a = array![[1.0f32, 2.0], [3.0, 4.0]];
let b = array![[5.0f32, 6.0], [7.0, 8.0]];
let result = engine
.adaptivematrix_multiply_f32(&a.view(), &b.view())
.expect("Operation failed");
let expected = array![[19.0f32, 22.0], [43.0, 50.0]];
for (actual, expected) in result.iter().zip(expected.iter()) {
assert_abs_diff_eq!(*actual, *expected, epsilon = 1e-10);
}
engine.set_auto_tuning(false);
let performance_report = engine.get_performance_report();
assert!(!performance_report.is_empty());
}
}