use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::f_estrin_polyeval5;
use crate::sincospi::{GenSinCosPiBackend, SinCosPiBackend, reduce_pi_64};
use crate::sincospi_tables::SINPI_K_PI_OVER_64;
#[cold]
fn as_sincpi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
const C: [(u64, u64); 7] = [
(0xb9d3080000000000, 0x3ff0000000000000),
(0xbc81873d86314302, 0xbffa51a6625307d3),
(0x3c84871b4ffeefae, 0x3fe9f9cb402bc46c),
(0xbc5562d6ae037010, 0xbfc86a8e4720db66),
(0xbc386c93f4549bac, 0x3f9ac6805cf31ffd),
(0x3c0dbda368edfa40, 0xbf633816a3399d4e),
(0xbbcf22ccc18f27a9, 0x3f23736e6a59edd9),
];
let x2 = backend.exact_mult(x, x);
let mut p = backend.quick_mul_add(
x2,
DoubleDouble::from_bit_pair(C[6]),
DoubleDouble::from_bit_pair(C[5]),
);
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[4]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
p.to_f64()
}
#[inline(always)]
fn sincpi_gen_impl(x: f64) -> f64 {
let ix = x.to_bits();
let ax = ix & 0x7fff_ffff_ffff_ffff;
if ax == 0 {
return 1.;
}
let e: i32 = (ax >> 52) as i32;
let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
let sgn: i64 = (ix as i64) >> 63;
let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
let mut s: i32 = 1063i32.wrapping_sub(e);
if s < 0 {
if e == 0x7ff {
if (ix << 12) == 0 {
return f64::NAN;
}
return x + x; }
s = -s - 1;
if s > 10 {
return f64::copysign(0.0, x);
}
let iq: u64 = (m as u64).wrapping_shl(s as u32);
if (iq & 2047) == 0 {
return f64::copysign(0.0, x);
}
}
if ax <= 0x3fa2000000000000u64 {
if ax < 0x3c90000000000000u64 {
if ax <= 0x3b05798ee2308c3au64 {
return 1.;
}
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
))]
{
const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3);
let p = f_fmla(x, M_SQR_PI_OVER_6 * x, 1.);
return p;
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
)))]
{
use crate::common::min_normal_f64;
return 1. - min_normal_f64();
}
}
let x2 = x * x;
let eps = x * f_fmla(
x2,
f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
const C: [u64; 5] = [
0xbffa51a6625307d3,
0x3fe9f9cb402bbeaa,
0xbfc86a8e466bbb5b,
0x3f9ac66d887e2f38,
0xbf628473a38d289a,
];
const F: DoubleDouble =
DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000));
let p = f_estrin_polyeval5(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
);
let v = DoubleDouble::from_exact_mult(p, x2);
let z = DoubleDouble::add(F, v);
let lb = z.hi + (z.lo - eps);
let ub = z.hi + (z.lo + eps);
if lb == ub {
return lb;
}
return as_sincpi_zero(x, &GenSinCosPiBackend {});
}
let si = e.wrapping_sub(1011);
if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
if (m0.wrapping_shl(si as u32)) == 0 {
return f64::copysign(0.0, x); }
let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
))]
{
let num = if t == 0 {
f64::copysign(1.0, x)
} else {
-f64::copysign(1.0, x)
};
const PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
let r = DoubleDouble::quick_mult_f64(PI, x);
let v = DoubleDouble::from_f64_div_dd(num, r);
return v.to_f64();
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
)))]
{
use crate::double_double::two_product_compatible;
return if two_product_compatible(x) {
let num = if t == 0 {
f64::copysign(1.0, x)
} else {
-f64::copysign(1.0, x)
};
const PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
let r = DoubleDouble::quick_mult_f64(PI, x);
let v = DoubleDouble::from_f64_div_dd(num, r);
v.to_f64()
} else {
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
let num = DyadicFloat128::new_from_f64(if t == 0 {
f64::copysign(1.0, x)
} else {
-f64::copysign(1.0, x)
});
const PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -126,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let dx = DyadicFloat128::new_from_f64(x);
let r = (PI * dx).reciprocal();
(num * r).fast_as_f64()
};
}
}
let (y, k) = reduce_pi_64(x);
let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
let cos_k = DoubleDouble::from_bit_pair(
SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
);
let r_sincos = crate::sincospi::sincospi_eval(y, &GenSinCosPiBackend {});
const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
let scale = DoubleDouble::quick_mult_f64(PI, x);
let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
rr = DoubleDouble::div(rr, scale);
let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err);
if ub == lb {
return rr.to_f64();
}
sincpi_dd(y, sin_k, cos_k, scale, &GenSinCosPiBackend {})
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx", enable = "fma")]
unsafe fn sincpi_fma_impl(x: f64) -> f64 {
use crate::sincospi::FmaSinCosPiBackend;
let ix = x.to_bits();
let ax = ix & 0x7fff_ffff_ffff_ffff;
if ax == 0 {
return 1.;
}
let e: i32 = (ax >> 52) as i32;
let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
let sgn: i64 = (ix as i64) >> 63;
let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
let mut s: i32 = 1063i32.wrapping_sub(e);
if s < 0 {
if e == 0x7ff {
if (ix << 12) == 0 {
return f64::NAN;
}
return x + x; }
s = -s - 1;
if s > 10 {
return f64::copysign(0.0, x);
}
let iq: u64 = (m as u64).wrapping_shl(s as u32);
if (iq & 2047) == 0 {
return f64::copysign(0.0, x);
}
}
if ax <= 0x3fa2000000000000u64 {
if ax < 0x3c90000000000000u64 {
if ax <= 0x3b05798ee2308c3au64 {
return 1.;
}
const M_SQR_PI_OVER_6: f64 = f64::from_bits(0xbffa51a6625307d3);
return f64::mul_add(x, M_SQR_PI_OVER_6 * x, 1.);
}
let x2 = x * x;
let eps = x * f64::mul_add(
x2,
f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
const C: [u64; 5] = [
0xbffa51a6625307d3,
0x3fe9f9cb402bbeaa,
0xbfc86a8e466bbb5b,
0x3f9ac66d887e2f38,
0xbf628473a38d289a,
];
const F: DoubleDouble =
DoubleDouble::from_bit_pair((0xbb93f0a925810000, 0x3ff0000000000000));
use crate::polyeval::d_estrin_polyeval5;
let p = d_estrin_polyeval5(
x2,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
);
let v = DoubleDouble::from_exact_mult_fma(p, x2);
let z = DoubleDouble::add(F, v);
let lb = z.hi + (z.lo - eps);
let ub = z.hi + (z.lo + eps);
if lb == ub {
return lb;
}
return as_sincpi_zero(x, &FmaSinCosPiBackend {});
}
let si = e.wrapping_sub(1011);
if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
if (m0.wrapping_shl(si as u32)) == 0 {
return f64::copysign(0.0, x); }
let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
let num = if t == 0 {
f64::copysign(1.0, x)
} else {
-f64::copysign(1.0, x)
};
const PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
let r = DoubleDouble::quick_mult_f64_fma(PI, x);
let v = DoubleDouble::from_f64_div_dd_fma(num, r);
return v.to_f64();
}
use crate::sincospi::reduce_pi_64_fma;
let (y, k) = reduce_pi_64_fma(x);
let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
let cos_k = DoubleDouble::from_bit_pair(
SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
);
let r_sincos = crate::sincospi::sincospi_eval(y, &FmaSinCosPiBackend {});
const PI: DoubleDouble = DoubleDouble::from_bit_pair((0x3ca1a62633145c07, 0x400921fb54442d18));
let scale = DoubleDouble::quick_mult_f64_fma(PI, x);
let sin_k_cos_y = DoubleDouble::quick_mult_fma(sin_k, r_sincos.v_cos);
let cos_k_sin_y = DoubleDouble::quick_mult_fma(cos_k, r_sincos.v_sin);
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
rr = DoubleDouble::div_fma(rr, scale);
let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err);
if ub == lb {
return rr.to_f64();
}
sincpi_dd(y, sin_k, cos_k, scale, &FmaSinCosPiBackend {})
}
pub fn f_sincpi(x: f64) -> f64 {
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
{
sincpi_gen_impl(x)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use std::sync::OnceLock;
static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
let q = EXECUTOR.get_or_init(|| {
if std::arch::is_x86_feature_detected!("avx")
&& std::arch::is_x86_feature_detected!("fma")
{
sincpi_fma_impl
} else {
fn def_sincpi(x: f64) -> f64 {
sincpi_gen_impl(x)
}
def_sincpi
}
});
unsafe { q(x) }
}
}
#[cold]
#[inline(always)]
fn sincpi_dd<B: SinCosPiBackend>(
x: f64,
sin_k: DoubleDouble,
cos_k: DoubleDouble,
scale: DoubleDouble,
backend: &B,
) -> f64 {
let r_sincos = crate::sincospi::sincospi_eval_dd(x, backend);
let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
let mut rr = backend.mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
rr = backend.div(rr, scale);
rr.to_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sincpi_zero() {
assert_eq!(f_sincpi(2.2204460492503131e-24), 1.0);
assert_eq!(f_sincpi(f64::EPSILON), 1.0);
assert_eq!(f_sincpi(0.007080019335262543), 0.9999175469662566);
assert_eq!(f_sincpi(0.05468860710998057), 0.9950875152844803);
assert_eq!(f_sincpi(0.5231231231), 0.6068750737806441);
assert_eq!(f_sincpi(1.), 0.);
assert_eq!(f_sincpi(-1.), 0.);
assert_eq!(f_sincpi(-2.), 0.);
assert_eq!(f_sincpi(-3.), 0.);
assert!(f_sincpi(f64::INFINITY).is_nan());
assert!(f_sincpi(f64::NEG_INFINITY).is_nan());
assert!(f_sincpi(f64::NAN).is_nan());
}
}