#[cfg(feature = "no-std")]
use core::{hint, mem::size_of};
#[cfg(not(feature = "no-std"))]
use std::{hint, mem::size_of};
pub struct OptimizationHints;
impl OptimizationHints {
#[inline(always)]
pub fn likely(b: bool) -> bool {
#[cfg(target_arch = "x86_64")]
{
if b {
unsafe { core::arch::x86_64::_mm_prefetch::<0>(core::ptr::null::<i8>()) };
}
}
b
}
#[inline(always)]
pub fn unlikely(b: bool) -> bool {
!Self::likely(!b)
}
#[inline(always)]
pub fn assume_aligned<T>(ptr: *const T, align: usize) -> *const T {
if align.is_power_of_two() && align >= size_of::<T>() {
unsafe { core::ptr::addr_of!(*ptr.cast::<u8>().add(0).cast::<T>()) }
} else {
ptr
}
}
#[inline(always)]
pub fn assume_aligned_mut<T>(ptr: *mut T, align: usize) -> *mut T {
if align.is_power_of_two() && align >= size_of::<T>() {
unsafe { core::ptr::addr_of_mut!(*ptr.cast::<u8>().add(0).cast::<T>()) }
} else {
ptr
}
}
#[inline(always)]
pub fn assume_range<T: PartialOrd + Copy>(value: T, min: T, max: T) -> T {
if value >= min && value <= max {
value
} else {
unsafe { hint::unreachable_unchecked() }
}
}
#[inline(always)]
pub fn assume_len<T>(slice: &[T], len: usize) -> &[T] {
if slice.len() == len {
slice
} else {
unsafe { hint::unreachable_unchecked() }
}
}
#[inline(always)]
pub fn assume_len_mut<T>(slice: &mut [T], len: usize) -> &mut [T] {
if slice.len() == len {
slice
} else {
unsafe { hint::unreachable_unchecked() }
}
}
#[inline(always)]
pub fn assume_loop_count(count: usize) -> usize {
if count > 0 {
count
} else {
0
}
}
#[inline(always)]
pub fn prefetch_read<T>(_ptr: *const T) {
#[cfg(target_arch = "x86_64")]
{
unsafe { core::arch::x86_64::_mm_prefetch::<3>(_ptr as *const i8) };
}
}
#[inline(always)]
pub fn prefetch_write<T>(_ptr: *const T) {
#[cfg(target_arch = "x86_64")]
{
unsafe { core::arch::x86_64::_mm_prefetch::<1>(_ptr as *const i8) };
}
}
#[inline(always)]
pub fn prefetch_nta<T>(_ptr: *const T) {
#[cfg(target_arch = "x86_64")]
{
unsafe { core::arch::x86_64::_mm_prefetch::<0>(_ptr as *const i8) };
}
}
#[inline(always)]
pub fn assume_noalias<T>(ptr1: *const T, ptr2: *const T, len: usize) -> bool {
let range1 = ptr1 as usize..ptr1 as usize + len * size_of::<T>();
let range2 = ptr2 as usize..ptr2 as usize + len * size_of::<T>();
!range1.contains(&range2.start) && !range2.contains(&range1.start)
}
#[inline(always)]
pub fn optimal_simd_width<T>() -> usize {
match size_of::<T>() {
1 => 64, 2 => 32, 4 => 16, 8 => 8, _ => 4, }
}
}
#[macro_export]
macro_rules! optimize_for_simd {
(likely($expr:expr)) => {
$crate::optimization_hints::OptimizationHints::likely($expr)
};
(unlikely($expr:expr)) => {
$crate::optimization_hints::OptimizationHints::unlikely($expr)
};
(assume_aligned($ptr:expr, $align:expr)) => {
$crate::optimization_hints::OptimizationHints::assume_aligned($ptr, $align)
};
(assume_len($slice:expr, $len:expr)) => {
$crate::optimization_hints::OptimizationHints::assume_len($slice, $len)
};
(prefetch_read($ptr:expr)) => {
$crate::optimization_hints::OptimizationHints::prefetch_read($ptr)
};
(prefetch_write($ptr:expr)) => {
$crate::optimization_hints::OptimizationHints::prefetch_write($ptr)
};
}
pub mod attributes {
pub const FORCE_INLINE: &str = "inline(always)";
pub const NEVER_INLINE: &str = "inline(never)";
pub const TARGET_FEATURE: &str = "target_feature";
pub const COLD: &str = "cold";
pub const HOT: &str = "hot";
pub const NO_MANGLE: &str = "no_mangle";
pub const REPR_C: &str = "repr(C)";
pub const REPR_ALIGN: &str = "repr(align)";
}
pub mod simd_hints {
use super::OptimizationHints;
#[inline(always)]
pub fn assume_simd_aligned<T>(slice: &[T]) -> &[T] {
let align = if cfg!(target_feature = "avx512f") {
64
} else if cfg!(target_feature = "avx2") {
32
} else {
16
};
let ptr = OptimizationHints::assume_aligned(slice.as_ptr(), align);
unsafe { core::slice::from_raw_parts(ptr, slice.len()) }
}
#[inline(always)]
pub fn assume_simd_aligned_mut<T>(slice: &mut [T]) -> &mut [T] {
let align = if cfg!(target_feature = "avx512f") {
64
} else if cfg!(target_feature = "avx2") {
32
} else {
16
};
let ptr = OptimizationHints::assume_aligned_mut(slice.as_mut_ptr(), align);
unsafe { core::slice::from_raw_parts_mut(ptr, slice.len()) }
}
#[inline(always)]
pub fn assume_vectorizable<T, F>(slice: &[T], mut f: F)
where
F: FnMut(&T),
{
let len = OptimizationHints::assume_loop_count(slice.len());
for item in slice.iter().take(len) {
f(item);
}
}
#[inline(always)]
pub fn assume_parallel_beneficial(size: usize) -> bool {
OptimizationHints::likely(size > 1000)
}
#[inline(always)]
pub fn optimal_chunk_size<T>() -> usize {
OptimizationHints::optimal_simd_width::<T>() * 4
}
}
#[allow(non_snake_case)]
#[cfg(all(test, not(feature = "no-std")))]
mod tests {
use super::*;
#[test]
fn test_optimization_hints() {
let ptr = [1.0f32; 16].as_ptr();
let aligned_ptr = OptimizationHints::assume_aligned(ptr, 16);
assert_eq!(ptr, aligned_ptr);
let slice = &[1, 2, 3, 4];
let len_slice = OptimizationHints::assume_len(slice, 4);
assert_eq!(slice.len(), len_slice.len());
let optimal_width = OptimizationHints::optimal_simd_width::<f32>();
assert!(optimal_width > 0);
}
#[test]
fn test_simd_hints() {
let data = vec![1.0f32; 64];
let aligned_slice = simd_hints::assume_simd_aligned(&data);
assert_eq!(data.len(), aligned_slice.len());
let chunk_size = simd_hints::optimal_chunk_size::<f32>();
assert!(chunk_size > 0);
let parallel = simd_hints::assume_parallel_beneficial(2000);
assert!(parallel);
}
#[test]
fn test_branch_hints() {
let likely_true = OptimizationHints::likely(true);
let unlikely_false = OptimizationHints::unlikely(false);
assert!(likely_true);
assert!(!unlikely_false);
}
#[test]
fn test_prefetch_hints() {
let data = vec![1.0f32; 100];
OptimizationHints::prefetch_read(data.as_ptr());
OptimizationHints::prefetch_write(data.as_ptr());
OptimizationHints::prefetch_nta(data.as_ptr());
}
#[test]
fn test_macro_hints() {
let data = vec![1.0f32; 16];
let ptr = optimize_for_simd!(assume_aligned(data.as_ptr(), 16));
optimize_for_simd!(prefetch_read(ptr));
let slice = optimize_for_simd!(assume_len(data.as_slice(), 16));
assert_eq!(slice.len(), 16);
}
}