simd_lookup/
lib.rs

1//! # simd-lookup
2//!
3//! High-performance SIMD utilities for fast table lookups, compression, and data processing.
4//!
5//! ## Features
6//!
7//! - **Cross-platform SIMD**: Automatic dispatch to optimal implementation (AVX-512, AVX2, NEON)
8//! - **Zero-cost abstractions**: Thin wrappers over platform intrinsics via the [`wide`] crate
9//! - **ARM NEON optimized**: Compress operations achieve up to 12 Gelem/s on Apple Silicon
10//!
11//! ## Core Modules
12//!
13//! - [`simd_compress`] — Stream compaction (VCOMPRESS): pack selected elements by bitmask
14//! - [`simd_gather`] — Parallel memory gather with SIMD indices
15//! - [`small_table`] — 64-byte lookup tables using ARM TBL4 / AVX-512 VPERMB
16//! - [`wide_utils`] — Shuffle, widen, split, and bitmask utilities for `wide` types
17//! - [`prefetch`] — Cross-platform memory prefetch (L1/L2/L3 hints)
18//!
19//!
20//! ## Quick Example
21//!
22//! ```rust
23//! use simd_lookup::{compress_store_u32x8};
24//! use wide::u32x8;
25//!
26//! // Compress: select elements where mask bits are set
27//! let data = u32x8::from([10, 20, 30, 40, 50, 60, 70, 80]);
28//! let mask = 0b10110010u8; // Select positions 1, 4, 5, 7
29//! let mut output = [0u32; 8];
30//!
31//! let count = compress_store_u32x8(data, mask, &mut output);
32//! assert_eq!(count, 4);
33//! assert_eq!(&output[..count], &[20, 50, 60, 80]);
34//! ```
35//!
36//! ## Platform Support
37//!
38//! | Platform | Optimization Level |
39//! |----------|-------------------|
40//! | ARM aarch64 (Apple Silicon) | Full NEON optimization |
41//! | x86-64 AVX-512 (Ice Lake+) | Native compress/gather |
42//! | x86-64 AVX2 | Shuffle-based fallbacks |
43//! | Other | Scalar fallbacks |
44
45pub mod bulk_vec_extender;
46pub mod eight_value_lookup;
47pub mod entropy_map_lookup;
48pub mod lookup_kernel;
49pub mod prefetch;
50pub mod simd_compress;
51pub mod simd_gather;
52pub mod small_table;
53pub mod wide_utils;
54
55// Re-export the main types for convenience
56pub use eight_value_lookup::EightValueLookup;
57pub use entropy_map_lookup::{EntropyMapBitpackedLookup, EntropyMapLookup};
58pub use lookup_kernel::{
59    PipelinedSingleTableU32U8Lookup, SimdCascadingTableU32U8Lookup, SimdDualTableWithHashLookup,
60};
61pub use simd_compress::{
62    compress_store_u32x8, compress_store_u32x16, compress_store_u8x16,
63    compress_u32x8, compress_u32x16, compress_u8x16,
64};
65pub use wide_utils::{
66    FromBitmask, SimdSplit, WideUtilsExt,
67};
68pub use simd_gather::{
69    gather_u32index_u8, gather_masked_u32index_u8,
70    gather_u32index_u32, gather_masked_u32index_u32,
71};
72
73#[cfg(test)]
74mod tests {
75    #[test]
76    fn test_cpu_features() {
77        // Check if AVX512 features are enabled at compile time
78        #[cfg(target_feature = "avx512f")]
79        {
80            println!("✓ AVX-512 Foundation (AVX512F): ENABLED");
81        }
82        #[cfg(not(target_feature = "avx512f"))]
83        {
84            println!("✗ AVX-512 Foundation (AVX512F): DISABLED");
85        }
86
87        #[cfg(target_feature = "avx512bw")]
88        {
89            println!("✓ AVX-512 Byte and Word (AVX512BW): ENABLED");
90        }
91        #[cfg(not(target_feature = "avx512bw"))]
92        {
93            println!("✗ AVX-512 Byte and Word (AVX512BW): DISABLED");
94        }
95
96        #[cfg(target_feature = "avx512vl")]
97        {
98            println!("✓ AVX-512 Vector Length (AVX512VL): ENABLED");
99        }
100        #[cfg(not(target_feature = "avx512vl"))]
101        {
102            println!("✗ AVX-512 Vector Length (AVX512VL): DISABLED");
103        }
104
105        #[cfg(target_feature = "avx2")]
106        {
107            println!("✓ AVX2: ENABLED");
108        }
109        #[cfg(not(target_feature = "avx2"))]
110        {
111            println!("✗ AVX2: DISABLED");
112        }
113
114        #[cfg(target_feature = "avx")]
115        {
116            println!("✓ AVX: ENABLED");
117        }
118        #[cfg(not(target_feature = "avx"))]
119        {
120            println!("✗ AVX: DISABLED");
121        }
122    }
123}