pub const BYTE_ALIGNMENT: u32 = 64;
pub const LOG_BYTE_ALIGNMENT: u32 = BYTE_ALIGNMENT.trailing_zeros();
pub const OBJECT_ALIGNMENT: u32 = 8;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[repr(i32)]
pub enum CpuExtLevel {
Generic = 0,
Mmx = 1,
Sse = 2,
Sse2 = 3,
Sse3 = 4,
Ssse3 = 5,
Sse41 = 6,
Sse42 = 7,
Avx = 8,
Avx2 = 9,
Avx2Fma = 10,
Avx512 = 11,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[repr(i32)]
pub enum ArmCpuExtLevel {
Generic = 0,
Neon = 1,
Sve = 2,
Sve2 = 3,
}
#[cfg(target_arch = "x86_64")]
pub fn get_cpu_ext_level() -> i32 {
if is_x86_feature_detected!("avx512f")
&& is_x86_feature_detected!("avx512bw")
&& is_x86_feature_detected!("avx512cd")
&& is_x86_feature_detected!("avx512dq")
&& is_x86_feature_detected!("avx512vl")
{
return CpuExtLevel::Avx512 as i32;
}
if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
return CpuExtLevel::Avx2Fma as i32;
}
if is_x86_feature_detected!("avx2") {
return CpuExtLevel::Avx2 as i32;
}
if is_x86_feature_detected!("avx") {
return CpuExtLevel::Avx as i32;
}
if is_x86_feature_detected!("sse4.2") {
return CpuExtLevel::Sse42 as i32;
}
if is_x86_feature_detected!("sse4.1") {
return CpuExtLevel::Sse41 as i32;
}
if is_x86_feature_detected!("ssse3") {
return CpuExtLevel::Ssse3 as i32;
}
if is_x86_feature_detected!("sse3") {
return CpuExtLevel::Sse3 as i32;
}
if is_x86_feature_detected!("sse2") {
return CpuExtLevel::Sse2 as i32;
}
if is_x86_feature_detected!("sse") {
return CpuExtLevel::Sse as i32;
}
CpuExtLevel::Generic as i32
}
#[cfg(target_arch = "aarch64")]
pub fn get_cpu_ext_level() -> i32 {
ArmCpuExtLevel::Neon as i32
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
pub fn get_cpu_ext_level() -> i32 {
0
}
#[inline]
pub const fn population_count(val: u32) -> u32 {
val.count_ones()
}
#[inline]
pub const fn count_leading_zeros(val: u32) -> u32 {
val.leading_zeros()
}
#[inline]
pub const fn count_leading_zeros_u64(val: u64) -> u32 {
val.leading_zeros()
}
#[inline]
pub const fn count_trailing_zeros(val: u32) -> u32 {
val.trailing_zeros()
}
#[inline]
pub fn ojph_round(val: f32) -> i32 {
(val + if val >= 0.0 { 0.5 } else { -0.5 }) as i32
}
#[inline]
pub fn ojph_trunc(val: f32) -> i32 {
val as i32
}
#[inline]
pub const fn calc_aligned_size<T>(count: usize, alignment: u32) -> usize {
let byte_size = count * std::mem::size_of::<T>();
let align = alignment as usize;
(byte_size + align - 1) & !(align - 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn popcount() {
assert_eq!(population_count(0b1010_1010), 4);
}
#[test]
fn leading_zeros() {
assert_eq!(count_leading_zeros(1), 31);
assert_eq!(count_leading_zeros_u64(1), 63);
}
#[test]
fn trailing_zeros() {
assert_eq!(count_trailing_zeros(8), 3);
}
#[test]
fn round_trunc() {
assert_eq!(ojph_round(2.3), 2);
assert_eq!(ojph_round(2.7), 3);
assert_eq!(ojph_round(-2.3), -2);
assert_eq!(ojph_trunc(2.9), 2);
assert_eq!(ojph_trunc(-2.9), -2);
}
#[test]
fn aligned_size() {
assert_eq!(calc_aligned_size::<i32>(10, 64), 64);
assert_eq!(calc_aligned_size::<i32>(20, 64), 128);
}
#[test]
fn cpu_detection_runs() {
let level = get_cpu_ext_level();
assert!(level >= 0);
}
}