use core::arch::x86_64::{_mm_cvttsd_si64, _mm_cvttss_si64, _mm_loadu_pd, _mm_loadu_ps};
use crate::{power_of_two_f32, power_of_two_f64};
#[inline(always)]
fn f32_to_i64(float: f32) -> i64 {
let floats = [float, 0., 0., 0.];
let floats_pointer = floats.as_ptr();
let floats_register = unsafe { _mm_loadu_ps(floats_pointer) };
unsafe { _mm_cvttss_si64(floats_register) }
}
#[inline(always)]
fn _f32_to_u64_branchful(float: f32) -> u64 {
const THRESHOLD_FLOAT: f32 = power_of_two_f32(63);
const THRESHOLD_INTEGER: u64 = 2u64.pow(63);
let in_range = float <= THRESHOLD_FLOAT;
if in_range {
f32_to_i64(float) as u64
} else {
let in_range_float = float - THRESHOLD_FLOAT;
let integer = f32_to_i64(in_range_float) as u64;
integer.overflowing_add(THRESHOLD_INTEGER).0
}
}
#[inline(always)]
fn f32_to_u64_branchless(float: f32) -> u64 {
const THRESHOLD: f32 = power_of_two_f32(63);
let integer1 = f32_to_i64(float);
let integer2 = f32_to_i64(float - THRESHOLD);
let too_large = integer1 >> 63;
(integer1 | (integer2 & too_large)) as u64
}
#[inline(always)]
fn f32_to_u64(float: f32) -> u64 {
f32_to_u64_branchless(float)
}
#[inline(always)]
fn f64_to_i64(float: f64) -> i64 {
let floats = [float, 0.];
let floats_pointer = floats.as_ptr();
let floats_register = unsafe { _mm_loadu_pd(floats_pointer) };
unsafe { _mm_cvttsd_si64(floats_register) }
}
#[inline(always)]
fn f64_to_u64(float: f64) -> u64 {
const THRESHOLD: f64 = power_of_two_f64(63);
let integer1 = f64_to_i64(float);
let integer2 = f64_to_i64(float - THRESHOLD);
let too_large = integer1 >> 63;
(integer1 | (integer2 & too_large)) as u64
}
pub mod implementation {
#[inline(always)]
pub fn f32_to_i8(float: f32) -> i8 {
super::f32_to_i64(float) as _
}
#[inline(always)]
pub fn f32_to_u8(float: f32) -> u8 {
super::f32_to_i64(float) as _
}
#[inline(always)]
pub fn f32_to_i16(float: f32) -> i16 {
super::f32_to_i64(float) as _
}
#[inline(always)]
pub fn f32_to_u16(float: f32) -> u16 {
super::f32_to_i64(float) as _
}
#[inline(always)]
pub fn f32_to_i32(float: f32) -> i32 {
super::f32_to_i64(float) as _
}
#[inline(always)]
pub fn f32_to_u32(float: f32) -> u32 {
super::f32_to_i64(float) as _
}
#[inline(always)]
pub fn f32_to_i64(float: f32) -> i64 {
super::f32_to_i64(float) as _
}
#[inline(always)]
pub fn f32_to_u64(float: f32) -> u64 {
super::f32_to_u64(float) as _
}
#[inline(always)]
pub fn f32_to_i128(float: f32) -> i128 {
float as _
}
#[inline(always)]
pub fn f32_to_u128(float: f32) -> u128 {
float as _
}
#[inline(always)]
pub fn f64_to_i8(float: f64) -> i8 {
f64_to_i64(float) as _
}
#[inline(always)]
pub fn f64_to_u8(float: f64) -> u8 {
super::f64_to_i64(float) as _
}
#[inline(always)]
pub fn f64_to_i16(float: f64) -> i16 {
super::f64_to_i64(float) as _
}
#[inline(always)]
pub fn f64_to_u16(float: f64) -> u16 {
super::f64_to_i64(float) as _
}
#[inline(always)]
pub fn f64_to_i32(float: f64) -> i32 {
super::f64_to_i64(float) as _
}
#[inline(always)]
pub fn f64_to_u32(float: f64) -> u32 {
super::f64_to_i64(float) as _
}
#[inline(always)]
pub fn f64_to_i64(float: f64) -> i64 {
super::f64_to_i64(float) as _
}
#[inline(always)]
pub fn f64_to_u64(float: f64) -> u64 {
super::f64_to_u64(float) as _
}
#[inline(always)]
pub fn f64_to_i128(float: f64) -> i128 {
float as _
}
#[inline(always)]
pub fn f64_to_u128(float: f64) -> u128 {
float as _
}
}