use crate::algorithms::{Algorithm, AlgorithmStats};
use crate::error::Result;
use crate::memory::cache_layout::{CacheHierarchy, detect_cache_hierarchy};
use crate::memory::SecureMemoryPool;
use crate::system::cpu_features::{CpuFeatures, get_cpu_features};
use std::cmp;
use std::time::Instant;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct CacheObliviousConfig {
pub cache_hierarchy: CacheHierarchy,
pub use_simd: bool,
pub use_parallel: bool,
pub small_threshold: usize,
pub memory_pool: Option<Arc<SecureMemoryPool>>,
pub cpu_features: &'static CpuFeatures,
}
impl Default for CacheObliviousConfig {
fn default() -> Self {
Self {
cache_hierarchy: detect_cache_hierarchy(),
use_simd: cfg!(feature = "simd"),
use_parallel: true,
small_threshold: 1024,
memory_pool: None,
cpu_features: get_cpu_features(),
}
}
}
pub struct CacheObliviousSort {
config: CacheObliviousConfig,
stats: AlgorithmStats,
}
impl CacheObliviousSort {
pub fn new() -> Self {
Self::with_config(CacheObliviousConfig::default())
}
pub fn with_config(config: CacheObliviousConfig) -> Self {
Self {
config,
stats: AlgorithmStats {
items_processed: 0,
processing_time_us: 0,
memory_used: 0,
used_parallel: false,
used_simd: false,
},
}
}
pub fn sort<T: Clone + Ord>(&mut self, data: &mut [T]) -> Result<()> {
let start_time = Instant::now();
if data.is_empty() {
return Ok(());
}
let selector = AdaptiveAlgorithmSelector::new(&self.config);
let strategy = selector.select_strategy(data.len(), &self.config.cache_hierarchy);
match strategy {
CacheObliviousSortingStrategy::CacheOblivious => {
self.cache_oblivious_sort(data)?;
}
CacheObliviousSortingStrategy::CacheAware => {
self.cache_aware_sort(data)?;
}
CacheObliviousSortingStrategy::Hybrid => {
self.hybrid_sort(data)?;
}
}
self.stats.items_processed = data.len();
self.stats.processing_time_us = start_time.elapsed().as_micros() as u64;
self.stats.used_simd = self.config.use_simd && self.config.cpu_features.has_avx2;
self.stats.used_parallel = self.config.use_parallel && data.len() > 10000;
Ok(())
}
pub fn cache_oblivious_sort<T: Clone + Ord>(&mut self, data: &mut [T]) -> Result<()> {
if data.len() <= self.config.small_threshold {
self.insertion_sort(data);
return Ok(());
}
let k = self.calculate_funnel_width(data.len());
self.funnel_sort_recursive(data, k)?;
Ok(())
}
fn funnel_sort_recursive<T: Clone + Ord>(&mut self, data: &mut [T], k: usize) -> Result<()> {
let n = data.len();
if n <= self.config.small_threshold {
self.insertion_sort(data);
return Ok(());
}
let sqrt_k = (k as f64).sqrt() as usize;
let chunk_size = n / k;
for i in 0..k {
let start = i * chunk_size;
let end = if i == k - 1 { n } else { (i + 1) * chunk_size };
if start < end {
self.funnel_sort_recursive(&mut data[start..end], sqrt_k)?;
}
}
self.cache_oblivious_merge(data, k, chunk_size)?;
Ok(())
}
fn cache_oblivious_merge<T: Clone + Ord>(&mut self, data: &mut [T], k: usize, chunk_size: usize) -> Result<()> {
if k <= 1 {
return Ok(());
}
let mut temp = data.to_vec();
let mut segments = Vec::new();
if self.config.cpu_features.has_avx2 && data.len() > 64 {
#[cfg(target_arch = "x86_64")]
{
for i in 0..k {
let start = i * chunk_size;
if start < data.len() {
unsafe {
let ptr = data.as_ptr().add(start) as *const i8;
std::arch::x86_64::_mm_prefetch(ptr, std::arch::x86_64::_MM_HINT_T0);
}
}
}
}
}
for i in 0..k {
let start = i * chunk_size;
let end = if i == k - 1 { data.len() } else { (i + 1) * chunk_size };
if start < end {
segments.push((start, end, start)); }
}
let mut output_pos = 0;
while !segments.is_empty() {
if self.config.cpu_features.has_avx2 && output_pos % 16 == 0 {
#[cfg(target_arch = "x86_64")]
{
for &(_, end, pos) in segments.iter() {
if pos + 16 < end {
unsafe {
let ptr = data.as_ptr().add(pos + 16) as *const i8;
std::arch::x86_64::_mm_prefetch(ptr, std::arch::x86_64::_MM_HINT_T1);
}
}
}
}
}
let mut min_segment = 0;
let mut min_value = None;
for (i, &(_start, end, pos)) in segments.iter().enumerate() {
if pos < end {
let current_value = &data[pos];
if min_value.is_none() || current_value < min_value.expect("min_value set by prior iteration") {
min_value = Some(current_value);
min_segment = i;
}
}
}
if let Some(_) = min_value {
temp[output_pos] = data[segments[min_segment].2].clone();
output_pos += 1;
segments[min_segment].2 += 1;
if segments[min_segment].2 >= segments[min_segment].1 {
segments.remove(min_segment);
}
} else {
break;
}
}
self.cache_optimized_copy(&temp, data);
Ok(())
}
fn cache_optimized_copy<T: Clone>(&self, src: &[T], dst: &mut [T]) {
if src.len() != dst.len() {
dst.clone_from_slice(src);
return;
}
if src.len() > 256 && self.config.cpu_features.has_avx2 {
#[cfg(target_arch = "x86_64")]
{
let chunk_size = 64; for i in (0..src.len()).step_by(chunk_size) {
let end = std::cmp::min(i + chunk_size, src.len());
if end + chunk_size < src.len() {
unsafe {
let src_ptr = src.as_ptr().add(end + chunk_size) as *const i8;
let dst_ptr = dst.as_mut_ptr().add(end + chunk_size) as *const i8;
std::arch::x86_64::_mm_prefetch(src_ptr, std::arch::x86_64::_MM_HINT_T0);
std::arch::x86_64::_mm_prefetch(dst_ptr, std::arch::x86_64::_MM_HINT_T0);
}
}
dst[i..end].clone_from_slice(&src[i..end]);
}
}
} else {
dst.clone_from_slice(src);
}
}
fn cache_aware_sort<T: Clone + Ord>(&mut self, data: &mut [T]) -> Result<()> {
if data.len() * std::mem::size_of::<T>() <= self.config.cache_hierarchy.l1_size {
self.l1_optimized_sort(data);
} else if data.len() * std::mem::size_of::<T>() <= self.config.cache_hierarchy.l2_size {
self.l2_optimized_sort(data);
} else {
self.l3_optimized_sort(data);
}
Ok(())
}
fn hybrid_sort<T: Clone + Ord>(&mut self, data: &mut [T]) -> Result<()> {
let data_size = data.len() * std::mem::size_of::<T>();
if data_size <= self.config.cache_hierarchy.l2_size {
self.cache_aware_sort(data)
} else {
self.cache_oblivious_sort(data)
}
}
fn calculate_funnel_width(&self, n: usize) -> usize {
let cache_size = self.config.cache_hierarchy.l2_size;
let block_size = self.config.cache_hierarchy.l2_line_size;
let k = ((cache_size / block_size) as f64).sqrt() as usize;
cmp::max(k, 2).min(cmp::min(n, 64)) }
fn insertion_sort<T: Clone + Ord>(&mut self, data: &mut [T]) {
for i in 1..data.len() {
let key = data[i].clone();
let mut j = i;
while j > 0 && data[j - 1] > key {
data[j] = data[j - 1].clone();
j -= 1;
}
data[j] = key;
}
}
fn l1_optimized_sort<T: Clone + Ord>(&mut self, data: &mut [T]) {
if self.config.use_simd && self.config.cpu_features.has_avx2 && data.len() >= 16 {
self.simd_insertion_sort(data);
} else if self.config.use_simd && self.config.cpu_features.has_sse42 && data.len() >= 8 {
self.simd_insertion_sort(data);
} else {
self.insertion_sort(data);
}
}
fn l2_optimized_sort<T: Clone + Ord>(&mut self, data: &mut [T]) {
if data.len() > 16 {
self.cache_aware_quicksort(data, 0, data.len() - 1);
} else {
self.insertion_sort(data);
}
}
fn l3_optimized_sort<T: Clone + Ord>(&mut self, data: &mut [T]) {
if data.len() > 32 {
self.cache_aware_mergesort(data);
} else {
self.insertion_sort(data);
}
}
fn simd_insertion_sort<T: Clone + Ord>(&mut self, data: &mut [T]) {
if data.len() > 64 {
#[cfg(target_arch = "x86_64")]
{
if self.config.cpu_features.has_avx2 {
unsafe {
for i in (0..data.len()).step_by(8) {
let ptr = data.as_ptr().add(i) as *const i8;
std::arch::x86_64::_mm_prefetch(ptr, std::arch::x86_64::_MM_HINT_T0);
}
}
}
}
}
self.cache_aware_insertion_sort(data);
}
fn cache_aware_insertion_sort<T: Clone + Ord>(&mut self, data: &mut [T]) {
for i in 1..data.len() {
let key = data[i].clone();
let mut j = i;
if i + 8 < data.len() && self.config.cpu_features.has_avx2 {
#[cfg(target_arch = "x86_64")]
unsafe {
let ptr = data.as_ptr().add(i + 8) as *const i8;
std::arch::x86_64::_mm_prefetch(ptr, std::arch::x86_64::_MM_HINT_T1);
}
}
while j > 0 && data[j - 1] > key {
data[j] = data[j - 1].clone();
j -= 1;
}
data[j] = key;
}
}
fn cache_aware_quicksort<T: Clone + Ord>(&mut self, data: &mut [T], low: usize, high: usize) {
if low < high {
let pivot = self.cache_aware_partition(data, low, high);
if pivot > 0 {
self.cache_aware_quicksort(data, low, pivot - 1);
}
if pivot < high {
self.cache_aware_quicksort(data, pivot + 1, high);
}
}
}
fn cache_aware_partition<T: Clone + Ord>(&mut self, data: &mut [T], low: usize, high: usize) -> usize {
let pivot = data[high].clone();
let mut i = low;
for j in low..high {
if data[j] <= pivot {
data.swap(i, j);
i += 1;
}
}
data.swap(i, high);
i
}
fn cache_aware_mergesort<T: Clone + Ord>(&mut self, data: &mut [T]) {
let len = data.len();
if len <= 1 {
return;
}
let mid = len / 2;
let mut left = data[..mid].to_vec();
let mut right = data[mid..].to_vec();
self.cache_aware_mergesort(&mut left);
self.cache_aware_mergesort(&mut right);
self.cache_aware_merge(data, &left, &right);
}
fn cache_aware_merge<T: Clone + Ord>(&mut self, data: &mut [T], left: &[T], right: &[T]) {
let mut i = 0;
let mut j = 0;
let mut k = 0;
while i < left.len() && j < right.len() {
if left[i] <= right[j] {
data[k] = left[i].clone();
i += 1;
} else {
data[k] = right[j].clone();
j += 1;
}
k += 1;
}
while i < left.len() {
data[k] = left[i].clone();
i += 1;
k += 1;
}
while j < right.len() {
data[k] = right[j].clone();
j += 1;
k += 1;
}
}
}
pub struct AdaptiveAlgorithmSelector {
cache_hierarchy: CacheHierarchy,
cpu_features: &'static CpuFeatures,
}
impl AdaptiveAlgorithmSelector {
pub fn new(config: &CacheObliviousConfig) -> Self {
Self {
cache_hierarchy: config.cache_hierarchy.clone(),
cpu_features: config.cpu_features,
}
}
pub fn select_strategy(&self, data_size: usize, cache_hierarchy: &CacheHierarchy) -> CacheObliviousSortingStrategy {
let element_size = std::mem::size_of::<u64>(); let data_bytes = data_size * element_size;
if data_bytes <= cache_hierarchy.l1_size {
CacheObliviousSortingStrategy::CacheAware
} else if data_bytes <= cache_hierarchy.l3_size {
CacheObliviousSortingStrategy::CacheOblivious
} else {
CacheObliviousSortingStrategy::Hybrid
}
}
pub fn analyze_data<T>(&self, data: &[T]) -> DataCharacteristics {
DataCharacteristics {
size: data.len(),
memory_footprint: data.len() * std::mem::size_of::<T>(),
fits_in_l1: data.len() * std::mem::size_of::<T>() <= self.cache_hierarchy.l1_size,
fits_in_l2: data.len() * std::mem::size_of::<T>() <= self.cache_hierarchy.l2_size,
fits_in_l3: data.len() * std::mem::size_of::<T>() <= self.cache_hierarchy.l3_size,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CacheObliviousSortingStrategy {
CacheAware,
CacheOblivious,
Hybrid,
}
#[derive(Debug, Clone)]
pub struct DataCharacteristics {
pub size: usize,
pub memory_footprint: usize,
pub fits_in_l1: bool,
pub fits_in_l2: bool,
pub fits_in_l3: bool,
}
pub struct VanEmdeBoas<T> {
data: Vec<T>,
height: usize,
cache_hierarchy: CacheHierarchy,
cpu_features: &'static CpuFeatures,
cache_line_size: usize,
}
impl<T: Clone> VanEmdeBoas<T> {
pub fn new(data: Vec<T>, cache_hierarchy: CacheHierarchy) -> Self {
let height = (data.len() as f64).log2().ceil() as usize;
let cpu_features = get_cpu_features();
let cache_line_size = cache_hierarchy.l1_line_size;
Self {
data,
height,
cache_hierarchy,
cpu_features,
cache_line_size,
}
}
pub fn with_cpu_features(data: Vec<T>, cache_hierarchy: CacheHierarchy, cpu_features: &'static CpuFeatures) -> Self {
let height = (data.len() as f64).log2().ceil() as usize;
let cache_line_size = cache_hierarchy.l1_line_size;
Self {
data,
height,
cache_hierarchy,
cpu_features,
cache_line_size,
}
}
pub fn get(&self, index: usize) -> Option<&T> {
if index < self.data.len() {
let physical_index = self.cache_optimal_index(index);
if self.cpu_features.has_avx2 && self.data.len() > 64 {
#[cfg(target_arch = "x86_64")]
{
let cache_line_elements = self.cache_line_size / std::mem::size_of::<T>();
if physical_index + cache_line_elements < self.data.len() {
unsafe {
let ptr = self.data.as_ptr().add(physical_index + cache_line_elements) as *const i8;
std::arch::x86_64::_mm_prefetch(ptr, std::arch::x86_64::_MM_HINT_T1);
}
}
}
}
Some(&self.data[physical_index])
} else {
None
}
}
fn cache_optimal_index(&self, logical_index: usize) -> usize {
if logical_index < self.data.len() {
logical_index
} else {
self.data.len() - 1
}
}
fn _veb_layout_recursive(&self, index: usize, offset: usize, size: usize, height: usize) -> usize {
if height <= 1 || size <= 1 || index >= size {
return (offset + index).min(self.data.len() - 1);
}
let half_height = height / 2;
let cluster_size = 1 << half_height;
let cluster = index / cluster_size;
let position = index % cluster_size;
if offset + cluster * cluster_size >= self.data.len() {
return (offset + index).min(self.data.len() - 1);
}
let top_offset = offset;
let bottom_offset = (offset + (size / cluster_size)).min(self.data.len());
if cluster == 0 {
self._veb_layout_recursive(position, top_offset, cluster_size, half_height)
} else {
let new_offset = (bottom_offset + cluster * cluster_size).min(self.data.len() - 1);
self._veb_layout_recursive(position, new_offset, cluster_size, half_height)
}
}
}
impl Algorithm for CacheObliviousSort {
type Config = CacheObliviousConfig;
type Input = Vec<i32>; type Output = Vec<i32>;
fn execute(&self, config: &Self::Config, mut input: Self::Input) -> Result<Self::Output> {
let mut sorter = Self::with_config(config.clone());
sorter.sort(&mut input)?;
Ok(input)
}
fn stats(&self) -> AlgorithmStats {
self.stats.clone()
}
fn estimate_memory(&self, input_size: usize) -> usize {
input_size * std::mem::size_of::<i32>() * 2 }
fn supports_parallel(&self) -> bool {
true
}
fn supports_simd(&self) -> bool {
true
}
}
impl Default for CacheObliviousSort {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cache_oblivious_sort_basic() {
let mut sorter = CacheObliviousSort::new();
let mut data = vec![3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5];
assert!(sorter.sort(&mut data).is_ok());
assert_eq!(data, vec![1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 9]);
}
#[test]
fn test_cache_oblivious_sort_empty() {
let mut sorter = CacheObliviousSort::new();
let mut data: Vec<i32> = vec![];
assert!(sorter.sort(&mut data).is_ok());
assert!(data.is_empty());
}
#[test]
fn test_cache_oblivious_sort_single_element() {
let mut sorter = CacheObliviousSort::new();
let mut data = vec![42];
assert!(sorter.sort(&mut data).is_ok());
assert_eq!(data, vec![42]);
}
#[test]
fn test_cache_oblivious_sort_large() {
let mut sorter = CacheObliviousSort::new();
let mut data: Vec<i32> = (0..10000).rev().collect();
let expected: Vec<i32> = (0..10000).collect();
assert!(sorter.sort(&mut data).is_ok());
assert_eq!(data, expected);
}
#[test]
fn test_adaptive_algorithm_selector() {
let config = CacheObliviousConfig::default();
let selector = AdaptiveAlgorithmSelector::new(&config);
let strategy = selector.select_strategy(100, &config.cache_hierarchy);
assert_eq!(strategy, CacheObliviousSortingStrategy::CacheAware);
let large_size = config.cache_hierarchy.l3_size / 8 + 1000; let strategy = selector.select_strategy(large_size, &config.cache_hierarchy);
assert_eq!(strategy, CacheObliviousSortingStrategy::Hybrid);
}
#[test]
fn test_data_characteristics_analysis() {
let config = CacheObliviousConfig::default();
let selector = AdaptiveAlgorithmSelector::new(&config);
let data = vec![1, 2, 3, 4, 5];
let characteristics = selector.analyze_data(&data);
assert_eq!(characteristics.size, 5);
assert_eq!(characteristics.memory_footprint, 5 * std::mem::size_of::<i32>());
assert!(characteristics.fits_in_l1);
}
#[test]
fn test_van_emde_boas_layout() {
let cache_hierarchy = CacheHierarchy::default();
let data = vec![1, 2, 3, 4, 5, 6, 7, 8];
let veb = VanEmdeBoas::new(data, cache_hierarchy);
assert_eq!(veb.get(0), Some(&1));
assert_eq!(veb.get(7), Some(&8));
assert_eq!(veb.get(8), None);
}
#[test]
fn test_funnel_width_calculation() {
let sorter = CacheObliviousSort::new();
let width = sorter.calculate_funnel_width(1000);
assert!(width >= 2 && width <= 64);
}
#[test]
fn test_insertion_sort() {
let mut sorter = CacheObliviousSort::new();
let mut data = vec![5, 2, 8, 1, 9];
sorter.insertion_sort(&mut data);
assert_eq!(data, vec![1, 2, 5, 8, 9]);
}
#[test]
fn test_algorithm_trait_implementation() {
let sorter = CacheObliviousSort::new();
let config = CacheObliviousConfig::default();
let input = vec![3, 1, 4, 1, 5];
let result = sorter.execute(&config, input);
assert!(result.is_ok());
assert_eq!(result.unwrap(), vec![1, 1, 3, 4, 5]);
assert!(sorter.supports_parallel());
assert!(sorter.supports_simd());
assert!(sorter.estimate_memory(1000) > 0);
}
#[test]
fn test_cache_oblivious_config_default() {
let config = CacheObliviousConfig::default();
assert!(config.cache_hierarchy.l1_size > 0);
assert!(config.cache_hierarchy.l2_size > 0);
assert!(config.cache_hierarchy.l3_size > 0);
assert_eq!(config.small_threshold, 1024);
}
#[test]
fn test_sorting_strategies() {
let strategies = [
CacheObliviousSortingStrategy::CacheAware,
CacheObliviousSortingStrategy::CacheOblivious,
CacheObliviousSortingStrategy::Hybrid,
];
for strategy in &strategies {
assert_ne!(format!("{:?}", strategy), "");
}
}
}