pub mod bit_ops;
pub mod cache_oblivious;
pub mod external_sort;
pub mod multiway_merge;
pub mod radix_sort;
pub mod set_operations;
pub mod set_ops;
pub mod simd_merge;
pub mod simd_search;
pub mod simd_set_intersect;
pub mod simd_set_union;
pub mod suffix_array;
pub mod tournament_tree;
pub use bit_ops::{popcount_slice, select_in_word, has_fast_bmi2};
pub use cache_oblivious::{
AdaptiveAlgorithmSelector, CacheObliviousConfig, CacheObliviousSort,
DataCharacteristics as CacheObliviousDataCharacteristics,
CacheObliviousSortingStrategy, VanEmdeBoas
};
pub use external_sort::{ExternalSort, ReplaceSelectSort, ReplaceSelectSortConfig};
pub use multiway_merge::{MergeSource, MultiWayMerge};
pub use radix_sort::{
AdvancedRadixSort, AdvancedRadixSortConfig, AdvancedStringRadixSort, AdvancedU32RadixSort,
AdvancedU64RadixSort, CpuFeatures, DataCharacteristics, RadixSort, RadixSortConfig,
RadixSortable, SortingStrategy as RadixSortingStrategy
};
pub use set_operations::{SetOperations, SetOperationsConfig, SetOperationStats};
pub use set_ops::{
multiset_intersection, multiset_1small_intersection, multiset_fast_intersection,
multiset_intersection2, multiset_1small_intersection2, multiset_fast_intersection2,
set_unique, set_unique_default,
multiset_union, multiset_difference,
set_intersection, set_union, set_difference
};
pub use simd_merge::{SimdComparator, SimdConfig, SimdOperations};
pub use simd_search::{simd_gallop_to, simd_block_filter};
pub use simd_set_intersect::{
sorted_intersect_adaptive, sorted_intersect_simd, sorted_intersect_count,
sorted_intersect_galloping,
};
pub use simd_set_union::{
sorted_union_adaptive, sorted_union_simd, sorted_union_count,
};
pub use suffix_array::{LcpArray, SuffixArray, SuffixArrayBuilder};
pub use tournament_tree::{EnhancedLoserTree, LoserTreeConfig, TournamentNode, CacheAlignedNode};
#[derive(Debug, Clone)]
pub struct AlgorithmConfig {
pub use_simd: bool,
pub use_parallel: bool,
pub parallel_threshold: usize,
pub memory_budget: usize,
}
impl Default for AlgorithmConfig {
fn default() -> Self {
Self {
use_simd: cfg!(feature = "simd"),
use_parallel: true,
parallel_threshold: 10_000,
memory_budget: 64 * 1024 * 1024, }
}
}
#[derive(Debug, Clone)]
pub struct AlgorithmStats {
pub items_processed: usize,
pub processing_time_us: u64,
pub memory_used: usize,
pub used_parallel: bool,
pub used_simd: bool,
}
impl AlgorithmStats {
pub fn items_per_second(&self) -> f64 {
if self.processing_time_us == 0 {
return 0.0;
}
(self.items_processed as f64) / (self.processing_time_us as f64 / 1_000_000.0)
}
pub fn items_per_byte(&self) -> f64 {
if self.memory_used == 0 {
return 0.0;
}
self.items_processed as f64 / self.memory_used as f64
}
}
pub trait Algorithm {
type Config;
type Input;
type Output;
fn execute(&self, config: &Self::Config, input: Self::Input) -> crate::Result<Self::Output>;
fn stats(&self) -> AlgorithmStats;
fn estimate_memory(&self, input_size: usize) -> usize;
fn supports_parallel(&self) -> bool {
false
}
fn supports_simd(&self) -> bool {
false
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_algorithm_config_default() {
let config = AlgorithmConfig::default();
assert_eq!(config.parallel_threshold, 10_000);
assert_eq!(config.memory_budget, 64 * 1024 * 1024);
#[cfg(feature = "simd")]
assert!(config.use_simd);
#[cfg(not(feature = "simd"))]
assert!(!config.use_simd);
}
#[test]
fn test_algorithm_stats() {
let stats = AlgorithmStats {
items_processed: 1000,
processing_time_us: 1000, memory_used: 1024,
used_parallel: false,
used_simd: false,
};
assert_eq!(stats.items_per_second(), 1_000_000.0); assert_eq!(stats.items_per_byte(), 1000.0 / 1024.0);
}
#[test]
fn test_algorithm_stats_edge_cases() {
let stats = AlgorithmStats {
items_processed: 1000,
processing_time_us: 0,
memory_used: 0,
used_parallel: false,
used_simd: false,
};
assert_eq!(stats.items_per_second(), 0.0);
assert_eq!(stats.items_per_byte(), 0.0);
}
}