use crate::common::{dd_fmla, dyad_fmla, f_fmla, is_odd_integer};
use crate::double_double::DoubleDouble;
use crate::polyeval::{f_polyeval3, f_polyeval4};
use crate::rounding::CpuRound;
use crate::sin::SinCos;
use crate::sincospi_tables::SINPI_K_PI_OVER_64;
#[cold]
#[inline(always)]
fn as_cospi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
const C: [(u64, u64); 5] = [
(0xbcb692b71366cc04, 0xc013bd3cc9be45de),
(0xbcb32b33fb803bd5, 0x40103c1f081b5ac4),
(0xbc9f5b752e98b088, 0xbff55d3c7e3cbff9),
(0x3c30023d540b9350, 0x3fce1f506446cb66),
(0x3c1a5d47937787d2, 0xbf8a9b062a36ba1c),
];
let x2 = backend.exact_mult(x, x);
let mut p = backend.quick_mul_add(
x2,
DoubleDouble::from_bit_pair(C[3]),
DoubleDouble::from_bit_pair(C[3]),
);
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
p = backend.mul_add_f64(x2, p, 1.);
p.to_f64()
}
#[cold]
#[inline(always)]
fn as_sinpi_zero<B: SinCosPiBackend>(x: f64, backend: &B) -> f64 {
const C: [(u64, u64); 6] = [
(0x3ca1a626311d9056, 0x400921fb54442d18),
(0x3cb055f12c462211, 0xc014abbce625be53),
(0xbc9789ea63534250, 0x400466bc6775aae1),
(0xbc78b86de6962184, 0xbfe32d2cce62874e),
(0x3c4eddf7cd887302, 0x3fb507833e2b781f),
(0x3bf180c9d4af2894, 0xbf7e2ea4e143707e),
];
let x2 = backend.exact_mult(x, x);
let mut p = backend.quick_mul_add(
x2,
DoubleDouble::from_bit_pair(C[5]),
DoubleDouble::from_bit_pair(C[4]),
);
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[3]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[2]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[1]));
p = backend.quick_mul_add(x2, p, DoubleDouble::from_bit_pair(C[0]));
p = backend.quick_mult_f64(p, x);
p.to_f64()
}
#[inline]
pub(crate) fn reduce_pi_64(x: f64) -> (f64, i64) {
let kd = (x * 64.).cpu_round();
let y = dd_fmla(kd, -1. / 64., x);
(y, unsafe {
kd.to_int_unchecked::<i64>() })
}
#[inline(always)]
#[allow(unused)]
pub(crate) fn reduce_pi_64_fma(x: f64) -> (f64, i64) {
let kd = (x * 64.).round();
let y = f64::mul_add(kd, -1. / 64., x);
(y, unsafe {
kd.to_int_unchecked::<i64>() })
}
pub(crate) trait SinCosPiBackend {
fn fma(&self, x: f64, y: f64, z: f64) -> f64;
fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64;
fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64;
fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64;
fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64);
fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble;
fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
fn odd_integer(&self, x: f64) -> bool;
fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble;
fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble;
fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble;
fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble;
}
pub(crate) struct GenSinCosPiBackend {}
impl SinCosPiBackend for GenSinCosPiBackend {
#[inline(always)]
fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
f_fmla(x, y, z)
}
#[inline(always)]
fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
dd_fmla(x, y, z)
}
#[inline(always)]
fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
dyad_fmla(x, y, z)
}
#[inline(always)]
fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
use crate::polyeval::f_polyeval3;
f_polyeval3(x, a0, a1, a2)
}
#[inline(always)]
fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
reduce_pi_64(x)
}
#[inline(always)]
fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
DoubleDouble::quick_mult_f64(x, y)
}
#[inline(always)]
fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
DoubleDouble::quick_mult(x, y)
}
#[inline(always)]
fn odd_integer(&self, x: f64) -> bool {
is_odd_integer(x)
}
#[inline(always)]
fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
DoubleDouble::div(x, y)
}
#[inline(always)]
fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
DoubleDouble::mul_add_f64(a, b, c)
}
#[inline(always)]
fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
DoubleDouble::quick_mul_add(a, b, c)
}
#[inline(always)]
fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
DoubleDouble::mul_add(a, b, c)
}
#[inline(always)]
fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
DoubleDouble::from_exact_mult(x, y)
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub(crate) struct FmaSinCosPiBackend {}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
impl SinCosPiBackend for FmaSinCosPiBackend {
#[inline(always)]
fn fma(&self, x: f64, y: f64, z: f64) -> f64 {
f64::mul_add(x, y, z)
}
#[inline(always)]
fn dd_fma(&self, x: f64, y: f64, z: f64) -> f64 {
f64::mul_add(x, y, z)
}
#[inline(always)]
fn dyad_fma(&self, x: f64, y: f64, z: f64) -> f64 {
f64::mul_add(x, y, z)
}
#[inline(always)]
fn polyeval3(&self, x: f64, a0: f64, a1: f64, a2: f64) -> f64 {
use crate::polyeval::d_polyeval3;
d_polyeval3(x, a0, a1, a2)
}
#[inline(always)]
fn arg_reduce_pi_64(&self, x: f64) -> (f64, i64) {
reduce_pi_64_fma(x)
}
#[inline(always)]
fn quick_mult_f64(&self, x: DoubleDouble, y: f64) -> DoubleDouble {
DoubleDouble::quick_mult_f64_fma(x, y)
}
#[inline(always)]
fn quick_mult(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
DoubleDouble::quick_mult_fma(x, y)
}
#[inline(always)]
fn odd_integer(&self, x: f64) -> bool {
is_odd_integer(x)
}
#[inline(always)]
fn div(&self, x: DoubleDouble, y: DoubleDouble) -> DoubleDouble {
DoubleDouble::div_fma(x, y)
}
#[inline(always)]
fn mul_add_f64(&self, a: DoubleDouble, b: DoubleDouble, c: f64) -> DoubleDouble {
DoubleDouble::mul_add_f64_fma(a, b, c)
}
#[inline(always)]
fn quick_mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
DoubleDouble::quick_mul_add_fma(a, b, c)
}
#[inline(always)]
fn mul_add(&self, a: DoubleDouble, b: DoubleDouble, c: DoubleDouble) -> DoubleDouble {
DoubleDouble::mul_add_fma(a, b, c)
}
#[inline(always)]
fn exact_mult(&self, x: f64, y: f64) -> DoubleDouble {
DoubleDouble::from_exact_mult_fma(x, y)
}
}
#[inline(always)]
pub(crate) fn sincospi_eval<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
let x2 = x * x;
let sin_lop = backend.polyeval3(
x2,
f64::from_bits(0xc014abbce625be4d),
f64::from_bits(0x400466bc6767f259),
f64::from_bits(0xbfe32d176b0b3baf),
) * x2;
let sin_lo = backend.dd_fma(f64::from_bits(0x3ca1a5c04563817a), x, sin_lop * x);
let sin_hi = x * f64::from_bits(0x400921fb54442d18);
let p = backend.polyeval3(
x2,
f64::from_bits(0xc013bd3cc9be45cf),
f64::from_bits(0x40103c1f08085ad1),
f64::from_bits(0xbff55d1e43463fc3),
);
let cos_lo = backend.dd_fma(p, x2, f64::from_bits(0xbbdf72adefec0800));
let cos_hi = f64::from_bits(0x3ff0000000000000);
let err = backend.fma(
x2,
f64::from_bits(0x3cb0000000000000), f64::from_bits(0x3c40000000000000), );
SinCos {
v_sin: DoubleDouble::from_exact_add(sin_hi, sin_lo),
v_cos: DoubleDouble::from_exact_add(cos_hi, cos_lo),
err,
}
}
#[inline(always)]
pub(crate) fn sincospi_eval_dd<B: SinCosPiBackend>(x: f64, backend: &B) -> SinCos {
let x2 = backend.exact_mult(x, x);
const SC: [(u64, u64); 5] = [
(0x3ca1a626330ccf19, 0x400921fb54442d18),
(0x3cb05540f6323de9, 0xc014abbce625be53),
(0xbc9050fdd1229756, 0x400466bc6775aadf),
(0xbc780d406f3472e8, 0xbfe32d2cce5a7bf1),
(0x3c4cfcf8b6b817f2, 0x3fb5077069d8a182),
];
let mut sin_y = backend.quick_mul_add(
x2,
DoubleDouble::from_bit_pair(SC[4]),
DoubleDouble::from_bit_pair(SC[3]),
);
sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[2]));
sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[1]));
sin_y = backend.quick_mul_add(x2, sin_y, DoubleDouble::from_bit_pair(SC[0]));
sin_y = backend.quick_mult_f64(sin_y, x);
const CC: [(u64, u64); 5] = [
(0xbaaa70a580000000, 0x3ff0000000000000),
(0xbcb69211d8dd1237, 0xc013bd3cc9be45de),
(0xbcbd96cfd637eeb7, 0x40103c1f081b5abf),
(0x3c994d75c577f029, 0xbff55d3c7e2e4ba5),
(0xbc5c542d998a4e48, 0x3fce1f2f5f747411),
];
let mut cos_y = backend.quick_mul_add(
x2,
DoubleDouble::from_bit_pair(CC[4]),
DoubleDouble::from_bit_pair(CC[3]),
);
cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[2]));
cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[1]));
cos_y = backend.quick_mul_add(x2, cos_y, DoubleDouble::from_bit_pair(CC[0]));
SinCos {
v_sin: sin_y,
v_cos: cos_y,
err: 0.,
}
}
#[cold]
#[inline(always)]
fn sinpi_dd<B: SinCosPiBackend>(
x: f64,
sin_k: DoubleDouble,
cos_k: DoubleDouble,
backend: &B,
) -> f64 {
let r_sincos = sincospi_eval_dd(x, backend);
let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
let rr = backend.mul_add(sin_k, r_sincos.v_cos, cos_k_sin_y);
rr.to_f64()
}
#[cold]
#[inline(always)]
fn sincospi_dd<B: SinCosPiBackend>(
x: f64,
sin_sin_k: DoubleDouble,
sin_cos_k: DoubleDouble,
cos_sin_k: DoubleDouble,
cos_cos_k: DoubleDouble,
backend: &B,
) -> (f64, f64) {
let r_sincos = sincospi_eval_dd(x, backend);
let cos_k_sin_y = backend.quick_mult(sin_cos_k, r_sincos.v_sin);
let rr_sin = backend.mul_add(sin_sin_k, r_sincos.v_cos, cos_k_sin_y);
let cos_k_sin_y = backend.quick_mult(cos_cos_k, r_sincos.v_sin);
let rr_cos = backend.mul_add(cos_sin_k, r_sincos.v_cos, cos_k_sin_y);
(rr_sin.to_f64(), rr_cos.to_f64())
}
#[inline]
fn sincospi_eval_extended(x: f64) -> SinCos {
let x2 = DoubleDouble::from_exact_mult(x, x);
let sin_lop = f_polyeval3(
x2.hi,
f64::from_bits(0x400466bc67763662),
f64::from_bits(0xbfe32d2cce5aad86),
f64::from_bits(0x3fb5077099a1f35b),
);
let mut v_sin = DoubleDouble::mul_f64_add(
x2,
sin_lop,
DoubleDouble::from_bit_pair((0x3cb0553d6ee5e8ec, 0xc014abbce625be53)),
);
v_sin = DoubleDouble::mul_add(
x2,
v_sin,
DoubleDouble::from_bit_pair((0x3ca1a626330dd130, 0x400921fb54442d18)),
);
v_sin = DoubleDouble::quick_mult_f64(v_sin, x);
let p = f_polyeval3(
x2.hi,
f64::from_bits(0x40103c1f081b5abf),
f64::from_bits(0xbff55d3c7e2edd89),
f64::from_bits(0x3fce1f2fd9d79484),
);
let mut v_cos = DoubleDouble::mul_f64_add(
x2,
p,
DoubleDouble::from_bit_pair((0xbcb69236a9b3ed73, 0xc013bd3cc9be45de)),
);
v_cos = DoubleDouble::mul_add_f64(x2, v_cos, f64::from_bits(0x3ff0000000000000));
SinCos {
v_sin: DoubleDouble::from_exact_add(v_sin.hi, v_sin.lo),
v_cos: DoubleDouble::from_exact_add(v_cos.hi, v_cos.lo),
err: 0.,
}
}
pub(crate) fn f_fast_sinpi_dd(x: f64) -> DoubleDouble {
let ix = x.to_bits();
let ax = ix & 0x7fff_ffff_ffff_ffff;
if ax == 0 {
return DoubleDouble::new(0., 0.);
}
let e: i32 = (ax >> 52) as i32;
let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
let sgn: i64 = (ix as i64) >> 63;
let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
let mut s: i32 = 1063i32.wrapping_sub(e);
if s < 0 {
s = -s - 1;
if s > 10 {
return DoubleDouble::new(0., f64::copysign(0.0, x));
}
let iq: u64 = (m as u64).wrapping_shl(s as u32);
if (iq & 2047) == 0 {
return DoubleDouble::new(0., f64::copysign(0.0, x));
}
}
if ax <= 0x3fa2000000000000u64 {
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
if ax < 0x3c90000000000000 {
if ax < 0x0350000000000000 {
let t = x * f64::from_bits(0x4690000000000000);
let z = DoubleDouble::quick_mult_f64(PI, t);
let r = z.to_f64();
let rs = r * f64::from_bits(0x3950000000000000);
let rt = rs * f64::from_bits(0x4690000000000000);
return DoubleDouble::new(
0.,
dyad_fmla((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs),
);
}
let z = DoubleDouble::quick_mult_f64(PI, x);
return z;
}
const C: [u64; 4] = [
0xbfe32d2cce62bd85,
0x3fb50783487eb73d,
0xbf7e3074f120ad1f,
0x3f3e8d9011340e5a,
];
let x2 = DoubleDouble::from_exact_mult(x, x);
const C_PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3ca1a626331457a4, 0x400921fb54442d18));
let p = f_polyeval4(
x2.hi,
f64::from_bits(C[0]),
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
);
let mut r = DoubleDouble::mul_f64_add(
x2,
p,
DoubleDouble::from_bit_pair((0xbc96dd7ae221e58c, 0x400466bc6775aae2)),
);
r = DoubleDouble::mul_add(
x2,
r,
DoubleDouble::from_bit_pair((0x3cb05511c8a6c478, 0xc014abbce625be53)),
);
r = DoubleDouble::mul_add(r, x2, C_PI);
r = DoubleDouble::quick_mult_f64(r, x);
let k = DoubleDouble::from_exact_add(r.hi, r.lo);
return k;
}
let si = e.wrapping_sub(1011);
if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
if (m0.wrapping_shl(si as u32)) == 0 {
return DoubleDouble::new(0., f64::copysign(0.0, x)); }
let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
return DoubleDouble::new(
0.,
if t == 0 {
f64::copysign(1.0, x)
} else {
-f64::copysign(1.0, x)
},
);
}
let (y, k) = reduce_pi_64(x);
let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
let cos_k = DoubleDouble::from_bit_pair(
SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
);
let r_sincos = sincospi_eval_extended(y);
let sin_k_cos_y = DoubleDouble::quick_mult(sin_k, r_sincos.v_cos);
let cos_k_sin_y = DoubleDouble::quick_mult(cos_k, r_sincos.v_sin);
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
DoubleDouble::from_exact_add(rr.hi, rr.lo)
}
#[inline(always)]
fn sinpi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
let ix = x.to_bits();
let ax = ix & 0x7fff_ffff_ffff_ffff;
if ax == 0 {
return x;
}
let e: i32 = (ax >> 52) as i32;
let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
let sgn: i64 = (ix as i64) >> 63;
let m = ((m0 as i64) ^ sgn).wrapping_sub(sgn);
let mut s: i32 = 1063i32.wrapping_sub(e);
if s < 0 {
if e == 0x7ff {
if (ix << 12) == 0 {
return f64::NAN;
}
return x + x; }
s = -s - 1;
if s > 10 {
return f64::copysign(0.0, x);
}
let iq: u64 = (m as u64).wrapping_shl(s as u32);
if (iq & 2047) == 0 {
return f64::copysign(0.0, x);
}
}
if ax <= 0x3fa2000000000000u64 {
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
if ax < 0x3c90000000000000 {
if ax < 0x0350000000000000 {
let t = x * f64::from_bits(0x4690000000000000);
let z = backend.quick_mult_f64(PI, t);
let r = z.to_f64();
let rs = r * f64::from_bits(0x3950000000000000);
let rt = rs * f64::from_bits(0x4690000000000000);
return backend.dyad_fma(
(z.hi - rt) + z.lo,
f64::from_bits(0x3950000000000000),
rs,
);
}
let z = backend.quick_mult_f64(PI, x);
return z.to_f64();
}
let x2 = x * x;
let x3 = x2 * x;
let x4 = x2 * x2;
let eps = x * backend.fma(
x2,
f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
const C: [u64; 4] = [
0xc014abbce625be51,
0x400466bc67754b46,
0xbfe32d2cc12a51f4,
0x3fb5060540058476,
];
const C_PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
let mut z = backend.quick_mult_f64(C_PI, x);
let zl0 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
let zl1 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
let lb = z.hi + (z.lo - eps);
let ub = z.hi + (z.lo + eps);
if lb == ub {
return lb;
}
return as_sinpi_zero(x, &backend);
}
let si = e.wrapping_sub(1011);
if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
if (m0.wrapping_shl(si as u32)) == 0 {
return f64::copysign(0.0, x); }
let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
return if t == 0 {
f64::copysign(1.0, x)
} else {
-f64::copysign(1.0, x)
};
}
let (y, k) = backend.arg_reduce_pi_64(x);
let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
let cos_k = DoubleDouble::from_bit_pair(
SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
);
let r_sincos = sincospi_eval(y, &backend);
let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
let mut rr = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err);
if ub == lb {
return rr.to_f64();
}
sinpi_dd(y, sin_k, cos_k, &backend)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx", enable = "fma")]
unsafe fn sinpi_fma_impl(x: f64) -> f64 {
sinpi_gen_impl(x, FmaSinCosPiBackend {})
}
pub fn f_sinpi(x: f64) -> f64 {
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
{
sinpi_gen_impl(x, GenSinCosPiBackend {})
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use std::sync::OnceLock;
static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
let q = EXECUTOR.get_or_init(|| {
if std::arch::is_x86_feature_detected!("avx")
&& std::arch::is_x86_feature_detected!("fma")
{
sinpi_fma_impl
} else {
fn def_sinpi(x: f64) -> f64 {
sinpi_gen_impl(x, GenSinCosPiBackend {})
}
def_sinpi
}
});
unsafe { q(x) }
}
}
#[inline(always)]
fn cospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> f64 {
let ix = x.to_bits();
let ax = ix & 0x7fff_ffff_ffff_ffff;
if ax == 0 {
return 1.0;
}
let e: i32 = (ax >> 52) as i32;
let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
let mut s = 1063i32.wrapping_sub(e); if s < 0 {
if e == 0x7ff {
if ix.wrapping_shl(12) == 0 {
return f64::NAN;
}
return x + x; }
s = -s - 1; if s > 11 {
return 1.0;
} let iq: u64 = (m as u64).wrapping_shl(s as u32).wrapping_add(1024);
if (iq & 2047) == 0 {
return 0.0;
}
}
if ax <= 0x3f30000000000000u64 {
if ax <= 0x3e2ccf6429be6621u64 {
return 1.0 - f64::from_bits(0x3c80000000000000);
}
let x2 = x * x;
let x4 = x2 * x2;
let eps = x2 * f64::from_bits(0x3cfa000000000000);
const C: [u64; 4] = [
0xc013bd3cc9be45de,
0x40103c1f081b5ac4,
0xbff55d3c7ff79b60,
0x3fd24c7b6f7d0690,
];
let p0 = backend.fma(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
let p1 = backend.fma(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
let p = x2 * backend.fma(x4, p0, p1);
let lb = (p - eps) + 1.;
let ub = (p + eps) + 1.;
if lb == ub {
return lb;
}
return as_cospi_zero(x, &backend);
}
let si: i32 = e.wrapping_sub(1011);
if si >= 0 && ((m as u64).wrapping_shl(si as u32) ^ 0x8000000000000000u64) == 0 {
return 0.0;
}
let (y, k) = backend.arg_reduce_pi_64(x);
let msin_k = DoubleDouble::from_bit_pair(
SINPI_K_PI_OVER_64[((k as u64).wrapping_add(64) & 127) as usize],
);
let cos_k = DoubleDouble::from_bit_pair(
SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
);
let r_sincos = sincospi_eval(y, &backend);
let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
let cos_k_msin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
let mut rr = DoubleDouble::from_exact_add(cos_k_cos_y.hi, cos_k_msin_y.hi);
rr.lo += cos_k_cos_y.lo + cos_k_msin_y.lo;
let ub = rr.hi + (rr.lo + r_sincos.err); let lb = rr.hi + (rr.lo - r_sincos.err);
if ub == lb {
return rr.to_f64();
}
sinpi_dd(y, cos_k, msin_k, &backend)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx", enable = "fma")]
unsafe fn cospi_fma_impl(x: f64) -> f64 {
cospi_gen_impl(x, FmaSinCosPiBackend {})
}
pub fn f_cospi(x: f64) -> f64 {
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
{
cospi_gen_impl(x, GenSinCosPiBackend {})
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use std::sync::OnceLock;
static EXECUTOR: OnceLock<unsafe fn(f64) -> f64> = OnceLock::new();
let q = EXECUTOR.get_or_init(|| {
if std::arch::is_x86_feature_detected!("avx")
&& std::arch::is_x86_feature_detected!("fma")
{
cospi_fma_impl
} else {
fn def_cospi(x: f64) -> f64 {
cospi_gen_impl(x, GenSinCosPiBackend {})
}
def_cospi
}
});
unsafe { q(x) }
}
}
#[inline(always)]
fn sincospi_gen_impl<B: SinCosPiBackend>(x: f64, backend: B) -> (f64, f64) {
let ix = x.to_bits();
let ax = ix & 0x7fff_ffff_ffff_ffff;
if ax == 0 {
return (x, 1.0);
}
let e: i32 = (ax >> 52) as i32;
let m0 = (ix & 0x000fffffffffffff) | (1u64 << 52);
let m: i64 = ((ix & 0x000fffffffffffff) | (1u64 << 52)) as i64;
let mut s = 1063i32.wrapping_sub(e); if s < 0 {
if e == 0x7ff {
if ix.wrapping_shl(12) == 0 {
return (f64::NAN, f64::NAN);
}
return (x + x, x + x); }
s = -s - 1;
if s > 10 {
static CF: [f64; 2] = [1., -1.];
let is_odd = backend.odd_integer(f64::from_bits(ax));
let cos_x = CF[is_odd as usize];
return (f64::copysign(0.0, x), cos_x);
} let iq: u64 = (m as u64).wrapping_shl(s as u32);
let sin_zero = (iq & 2047) == 0;
let cos_zero = ((m as u64).wrapping_shl(s as u32).wrapping_add(1024) & 2047) == 0;
if sin_zero && cos_zero {
} else if sin_zero {
static CF: [f64; 2] = [1., -1.];
let is_odd = backend.odd_integer(f64::from_bits(ax));
let cos_x = CF[is_odd as usize];
return (0.0, cos_x); } else if cos_zero {
let si = e.wrapping_sub(1011);
let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
return if t == 0 {
(f64::copysign(1.0, x), 0.0)
} else {
(-f64::copysign(1.0, x), 0.0)
}; }
}
if ax <= 0x3f30000000000000u64 {
if ax <= 0x3c90000000000000u64 {
const PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3ca1a62633145c07),
f64::from_bits(0x400921fb54442d18),
);
let sin_x = if ax < 0x0350000000000000 {
let t = x * f64::from_bits(0x4690000000000000);
let z = backend.quick_mult_f64(PI, t);
let r = z.to_f64();
let rs = r * f64::from_bits(0x3950000000000000);
let rt = rs * f64::from_bits(0x4690000000000000);
backend.dyad_fma((z.hi - rt) + z.lo, f64::from_bits(0x3950000000000000), rs)
} else {
let z = backend.quick_mult_f64(PI, x);
z.to_f64()
};
return (sin_x, 1.0 - f64::from_bits(0x3c80000000000000));
}
let x2 = x * x;
let x4 = x2 * x2;
let cos_eps = x2 * f64::from_bits(0x3cfa000000000000);
const COS_C: [u64; 4] = [
0xc013bd3cc9be45de,
0x40103c1f081b5ac4,
0xbff55d3c7ff79b60,
0x3fd24c7b6f7d0690,
];
let p0 = backend.fma(x2, f64::from_bits(COS_C[3]), f64::from_bits(COS_C[2]));
let p1 = backend.fma(x2, f64::from_bits(COS_C[1]), f64::from_bits(COS_C[0]));
let p = x2 * backend.fma(x4, p0, p1);
let cos_lb = (p - cos_eps) + 1.;
let cos_ub = (p + cos_eps) + 1.;
let cos_x = if cos_lb == cos_ub {
cos_lb
} else {
as_cospi_zero(x, &backend)
};
const SIN_C: [u64; 4] = [
0xc014abbce625be51,
0x400466bc67754b46,
0xbfe32d2cc12a51f4,
0x3fb5060540058476,
];
const C_PI: DoubleDouble =
DoubleDouble::from_bit_pair((0x3ca1a67088eb1a46, 0x400921fb54442d18));
let mut z = backend.quick_mult_f64(C_PI, x);
let x3 = x2 * x;
let zl0 = backend.fma(x2, f64::from_bits(SIN_C[1]), f64::from_bits(SIN_C[0]));
let zl1 = backend.fma(x2, f64::from_bits(SIN_C[3]), f64::from_bits(SIN_C[2]));
let sin_eps = x * backend.fma(
x2,
f64::from_bits(0x3d00000000000000), f64::from_bits(0x3bd0000000000000), );
z.lo = backend.fma(x3, backend.fma(x4, zl1, zl0), z.lo);
let sin_lb = z.hi + (z.lo - sin_eps);
let sin_ub = z.hi + (z.lo + sin_eps);
let sin_x = if sin_lb == sin_ub {
sin_lb
} else {
as_sinpi_zero(x, &backend)
};
return (sin_x, cos_x);
}
let si = e.wrapping_sub(1011);
if si >= 0 && (m0.wrapping_shl(si.wrapping_add(1) as u32)) == 0 {
if (m0.wrapping_shl(si as u32)) == 0 {
static CF: [f64; 2] = [1., -1.];
let is_odd = backend.odd_integer(f64::from_bits(ax));
let cos_x = CF[is_odd as usize];
return (f64::copysign(0.0, x), cos_x); }
let t = (m0.wrapping_shl((si - 1) as u32)) >> 63;
return if t == 0 {
(f64::copysign(1.0, x), 0.0)
} else {
(-f64::copysign(1.0, x), 0.0)
};
}
let (y, k) = backend.arg_reduce_pi_64(x);
let sin_k = DoubleDouble::from_bit_pair(SINPI_K_PI_OVER_64[((k as u64) & 127) as usize]);
let cos_k = DoubleDouble::from_bit_pair(
SINPI_K_PI_OVER_64[((k as u64).wrapping_add(32) & 127) as usize],
);
let msin_k = -sin_k;
let r_sincos = sincospi_eval(y, &backend);
let sin_k_cos_y = backend.quick_mult(sin_k, r_sincos.v_cos);
let cos_k_sin_y = backend.quick_mult(cos_k, r_sincos.v_sin);
let cos_k_cos_y = backend.quick_mult(r_sincos.v_cos, cos_k);
let msin_k_sin_y = backend.quick_mult(r_sincos.v_sin, msin_k);
let mut rr_sin = DoubleDouble::from_exact_add(sin_k_cos_y.hi, cos_k_sin_y.hi);
rr_sin.lo += sin_k_cos_y.lo + cos_k_sin_y.lo;
let sin_ub = rr_sin.hi + (rr_sin.lo + r_sincos.err); let sin_lb = rr_sin.hi + (rr_sin.lo - r_sincos.err);
let mut rr_cos = DoubleDouble::from_exact_add(cos_k_cos_y.hi, msin_k_sin_y.hi);
rr_cos.lo += cos_k_cos_y.lo + msin_k_sin_y.lo;
let cos_ub = rr_cos.hi + (rr_cos.lo + r_sincos.err); let cos_lb = rr_cos.hi + (rr_cos.lo - r_sincos.err);
if sin_ub == sin_lb && cos_lb == cos_ub {
return (rr_sin.to_f64(), rr_cos.to_f64());
}
sincospi_dd(y, sin_k, cos_k, cos_k, msin_k, &backend)
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[target_feature(enable = "avx", enable = "fma")]
unsafe fn sincospi_fma_impl(x: f64) -> (f64, f64) {
sincospi_gen_impl(x, FmaSinCosPiBackend {})
}
pub fn f_sincospi(x: f64) -> (f64, f64) {
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
{
sincospi_gen_impl(x, GenSinCosPiBackend {})
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
use std::sync::OnceLock;
static EXECUTOR: OnceLock<unsafe fn(f64) -> (f64, f64)> = OnceLock::new();
let q = EXECUTOR.get_or_init(|| {
if std::arch::is_x86_feature_detected!("avx")
&& std::arch::is_x86_feature_detected!("fma")
{
sincospi_fma_impl
} else {
fn def_sincospi(x: f64) -> (f64, f64) {
sincospi_gen_impl(x, GenSinCosPiBackend {})
}
def_sincospi
}
});
unsafe { q(x) }
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sinpi() {
assert_eq!(f_sinpi(262143.50006870925), -0.9999999767029883);
assert_eq!(f_sinpi(7124076477593855.), 0.);
assert_eq!(f_sinpi(-11235582092889474000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.), -0.);
assert_eq!(f_sinpi(-2.7430620343968443e303), -0.0);
assert_eq!(f_sinpi(0.00003195557007273919), 0.00010039138401316004);
assert_eq!(f_sinpi(-0.038357843137253766), -0.12021328061499763);
assert_eq!(f_sinpi(1.0156097449358867), -0.04901980680173724);
assert_eq!(f_sinpi(74.8593852519989), 0.42752597787896457);
assert_eq!(f_sinpi(0.500091552734375), 0.9999999586369661);
assert_eq!(f_sinpi(0.5307886532952182), 0.9953257438106751);
assert_eq!(f_sinpi(3.1415926535897936), -0.43030121700009316);
assert_eq!(f_sinpi(-0.5305172747685276), -0.9954077178320563);
assert_eq!(f_sinpi(-0.03723630312089732), -0.1167146713267927);
assert_eq!(
f_sinpi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022946074000077123),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007208721750737005
);
assert_eq!(
f_sinpi(0.000000000000000000000000000000000000007413093439574428),
2.3288919890141717e-38
);
assert_eq!(f_sinpi(0.0031909299901270445), 0.0100244343161398578);
assert_eq!(f_sinpi(0.11909245901270445), 0.36547215190661003);
assert_eq!(f_sinpi(0.99909245901270445), 0.0028511202357662186);
assert!(f_sinpi(f64::INFINITY).is_nan());
assert!(f_sinpi(f64::NEG_INFINITY).is_nan());
assert!(f_sinpi(f64::NAN).is_nan());
}
#[test]
fn test_sincospi() {
let v0 = f_sincospi(1.0156097449358867);
assert_eq!(v0.0, f_sinpi(1.0156097449358867));
assert_eq!(v0.1, f_cospi(1.0156097449358867));
let v1 = f_sincospi(4503599627370496.);
assert_eq!(v1.0, f_sinpi(4503599627370496.));
assert_eq!(v1.1, f_cospi(4503599627370496.));
let v1 = f_sincospi(-108.);
assert_eq!(v1.0, f_sinpi(-108.));
assert_eq!(v1.1, f_cospi(-108.));
let v1 = f_sincospi(3.);
assert_eq!(v1.0, f_sinpi(3.));
assert_eq!(v1.1, f_cospi(3.));
let v1 = f_sincospi(13.5);
assert_eq!(v1.0, f_sinpi(13.5));
assert_eq!(v1.1, f_cospi(13.5));
let v1 = f_sincospi(7124076477593855.);
assert_eq!(v1.0, f_sinpi(7124076477593855.));
assert_eq!(v1.1, f_cospi(7124076477593855.));
let v1 = f_sincospi(2533419148247186.5);
assert_eq!(v1.0, f_sinpi(2533419148247186.5));
assert_eq!(v1.1, f_cospi(2533419148247186.5));
let v1 = f_sincospi(2.2250653705240375E-308);
assert_eq!(v1.0, f_sinpi(2.2250653705240375E-308));
assert_eq!(v1.1, f_cospi(2.2250653705240375E-308));
let v1 = f_sincospi(2533420818956351.);
assert_eq!(v1.0, f_sinpi(2533420818956351.));
assert_eq!(v1.1, f_cospi(2533420818956351.));
let v1 = f_sincospi(2533822406803233.5);
assert_eq!(v1.0, f_sinpi(2533822406803233.5));
assert_eq!(v1.1, f_cospi(2533822406803233.5));
let v1 = f_sincospi(-3040685725640478.5);
assert_eq!(v1.0, f_sinpi(-3040685725640478.5));
assert_eq!(v1.1, f_cospi(-3040685725640478.5));
let v1 = f_sincospi(2533419148247186.5);
assert_eq!(v1.0, f_sinpi(2533419148247186.5));
assert_eq!(v1.1, f_cospi(2533419148247186.5));
let v1 = f_sincospi(2533420819267583.5);
assert_eq!(v1.0, f_sinpi(2533420819267583.5));
assert_eq!(v1.1, f_cospi(2533420819267583.5));
let v1 = f_sincospi(6979704728846336.);
assert_eq!(v1.0, f_sinpi(6979704728846336.));
assert_eq!(v1.1, f_cospi(6979704728846336.));
let v1 = f_sincospi(7124076477593855.);
assert_eq!(v1.0, f_sinpi(7124076477593855.));
assert_eq!(v1.1, f_cospi(7124076477593855.));
let v1 = f_sincospi(-0.00000000002728839192371484);
assert_eq!(v1.0, f_sinpi(-0.00000000002728839192371484));
assert_eq!(v1.1, f_cospi(-0.00000000002728839192371484));
let v1 = f_sincospi(0.00002465398569495569);
assert_eq!(v1.0, f_sinpi(0.00002465398569495569));
assert_eq!(v1.1, f_cospi(0.00002465398569495569));
}
#[test]
fn test_cospi() {
assert_eq!(0.9999497540959953, f_cospi(0.0031909299901270445));
assert_eq!(0.9308216542079669, f_cospi(0.11909299901270445));
assert_eq!(-0.1536194873288318, f_cospi(0.54909299901270445));
assert!(f_cospi(f64::INFINITY).is_nan());
assert!(f_cospi(f64::NEG_INFINITY).is_nan());
assert!(f_cospi(f64::NAN).is_nan());
}
}