use crate::bits::set_exponent_f64;
use crate::common::{dd_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::rounding::CpuRound;
use crate::sincos_reduce_tables::ONE_TWENTY_EIGHT_OVER_PI;
#[derive(Debug)]
pub(crate) struct AngleReduced {
pub(crate) angle: DoubleDouble,
}
#[derive(Default)]
pub(crate) struct LargeArgumentReduction {
x_reduced: f64,
idx: u64,
y_hi: f64,
y_lo: f64,
y_mid: DoubleDouble,
}
impl LargeArgumentReduction {
#[cold]
pub(crate) fn accurate(&self) -> DyadicFloat128 {
const PI_OVER_128_F128: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -133,
mantissa: 0xc90f_daa2_2168_c234_c4c6_628b_80dc_1cd1_u128,
};
let one_pi_rot = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize];
let y_lo_0 = DyadicFloat128::new_from_f64(self.x_reduced * f64::from_bits(one_pi_rot.3));
let y_lo_1 = DyadicFloat128::new_from_f64(self.y_lo) + y_lo_0;
let y_mid_f128 = DyadicFloat128::new_from_f64(self.y_mid.lo) + y_lo_1;
let y_hi_f128 =
DyadicFloat128::new_from_f64(self.y_hi) + DyadicFloat128::new_from_f64(self.y_mid.hi);
let y = y_hi_f128 + y_mid_f128;
y * PI_OVER_128_F128
}
pub(crate) fn reduce(&mut self, x: f64) -> (u64, DoubleDouble) {
const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
let mut xbits = x.to_bits();
let x_e = ((x.to_bits() >> 52) & 0x7ff) as i64;
let x_e_m62 = x_e.wrapping_sub(E_BIAS as i64 + 62);
self.idx = (x_e_m62 >> 4).wrapping_add(3) as u64;
xbits = set_exponent_f64(
xbits,
(x_e_m62 & 15)
.wrapping_add(E_BIAS as i64)
.wrapping_add(62i64) as u64,
);
self.x_reduced = f64::from_bits(xbits);
let one_pi = ONE_TWENTY_EIGHT_OVER_PI[self.idx as usize];
let ph = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.0));
let pm = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.1));
let pl = DoubleDouble::from_exact_mult(self.x_reduced, f64::from_bits(one_pi.2));
let sum_hi = ph.lo + pm.hi;
let kd = sum_hi.cpu_round();
self.y_hi = (ph.lo - kd) + pm.hi; self.y_mid = DoubleDouble::from_exact_add(pm.lo, pl.hi);
self.y_lo = pl.lo;
let y_l = dd_fmla(self.x_reduced, f64::from_bits(one_pi.3), self.y_lo);
let mut y = DoubleDouble::from_exact_add(self.y_hi, self.y_mid.hi);
y.lo += self.y_mid.lo + y_l;
const PI_OVER_128_DD: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c31a62633145c07),
f64::from_bits(0x3f9921fb54442d18),
);
let u = DoubleDouble::quick_mult(y, PI_OVER_128_DD);
(kd as i64 as u64, u)
}
}
static INVPI_2_64: [u64; 20] = [
0x28be60db9391054a,
0x7f09d5f47d4d3770,
0x36d8a5664f10e410,
0x7f9458eaf7aef158,
0x6dc91b8e909374b8,
0x1924bba82746487,
0x3f877ac72c4a69cf,
0xba208d7d4baed121,
0x3a671c09ad17df90,
0x4e64758e60d4ce7d,
0x272117e2ef7e4a0e,
0xc7fe25fff7816603,
0xfbcbc462d6829b47,
0xdb4d9fb3c9f2c26d,
0xd3d18fd9a797fa8b,
0x5d49eeb1faf97c5e,
0xcf41ce7de294a4ba,
0x9afed7ec47e35742,
0x1580cc11bf1edaea,
0xfc33ef0826bd0d87,
];
#[inline]
fn create_dd(c1: u64, c0: u64) -> DoubleDouble {
let mut c1 = c1;
let mut c0 = c0;
if c1 != 0 {
let e = c1.leading_zeros();
if e != 0 {
c1 = (c1 << e) | (c0 >> (64 - e));
c0 = c0.wrapping_shl(e);
}
let f = 0x3fe - e;
let t_u = ((f as u64) << 52) | ((c1 << 1) >> 12);
let hi = f64::from_bits(t_u);
c0 = (c1 << 53) | (c0 >> 11);
let l = if c0 != 0 {
let g = c0.leading_zeros();
if (g) != 0 {
c0 = c0.wrapping_shl(g);
}
let t_u = (((f - 53 - g) as u64) << 52) | ((c0 << 1) >> 12);
f64::from_bits(t_u)
} else {
0.
};
DoubleDouble::new(l, hi)
} else if c0 != 0 {
let e = c0.leading_zeros();
let f = 0x3fe - 64 - e;
c0 = c0.wrapping_shl(e + 1);
let t_u = ((f as u64) << 52) | (c0 >> 12);
let hi = f64::from_bits(t_u);
c0 = c0.wrapping_shl(52);
let l = if c0 != 0 {
let g = c0.leading_zeros();
c0 = c0.wrapping_shl(g + 1);
let t_u = (((f - 64 - g) as u64) << 52) | (c0 >> 12);
f64::from_bits(t_u)
} else {
0.
};
DoubleDouble::new(l, hi)
} else {
DoubleDouble::default()
}
}
#[inline]
fn frac_2pi(x: f64) -> DoubleDouble {
if x <= f64::from_bits(0x401921fb54442d17)
{
const C: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc66b01ec5417056),
f64::from_bits(0x3fc45f306dc9c883),
);
let mut z = DoubleDouble::quick_mult_f64(C, x);
z.lo = f_fmla(C.lo, x, z.lo);
z
} else
{
let t = x.to_bits();
let mut e = ((t >> 52) & 0x7ff) as i32;
let m = (1u64 << 52) | (t & 0xfffffffffffffu64);
let mut c0: u64;
let mut c1: u64;
let mut c2: u64;
if e <= 1074
{
let mut u = m as u128 * INVPI_2_64[1] as u128;
c0 = u as u64;
c1 = (u >> 64) as u64;
u = m as u128 * INVPI_2_64[0] as u128;
c1 = c1.wrapping_add(u as u64);
c2 = (u >> 64) as u64 + (c1 < (u as u64)) as u64;
e = 1075 - e; } else
{
let i = (e - 1138 + 63) / 64; let mut u = m as u128 * INVPI_2_64[i as usize + 2] as u128;
c0 = u as u64;
c1 = (u >> 64) as u64;
u = m as u128 * INVPI_2_64[i as usize + 1] as u128;
c1 = c1.wrapping_add(u as u64);
c2 = (u >> 64) as u64 + ((c1) < (u as u64)) as u64;
u = m as u128 * INVPI_2_64[i as usize] as u128;
c2 = c2.wrapping_add(u as u64);
e = 1139 + (i << 6) - e; }
if e == 64 {
c0 = c1;
c1 = c2;
} else {
c0 = (c1 << (64 - e)) | c0 >> e;
c1 = (c2 << (64 - e)) | c1 >> e;
}
create_dd(c1, c0)
}
}
#[inline]
pub(crate) fn rem2pi_any(x: f64) -> AngleReduced {
const TWO_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3cb1a62633145c07),
f64::from_bits(0x401921fb54442d18),
);
let a = frac_2pi(x);
let z = DoubleDouble::quick_mult(a, TWO_PI);
AngleReduced { angle: z }
}
#[inline]
fn rem2pif_small(x: f32) -> f64 {
const ONE_OVER_PI_2: f64 = f64::from_bits(0x3fc45f306dc9c883);
const TWO_PI: [u64; 3] = [0x401921fb54440000, 0x3da68c234c4c0000, 0x3b498a2e03707345];
let dx = x as f64;
let kd = (dx * ONE_OVER_PI_2).cpu_round();
let mut y = dd_fmla(-kd, f64::from_bits(TWO_PI[0]), dx);
y = dd_fmla(-kd, f64::from_bits(TWO_PI[1]), y);
y = dd_fmla(-kd, f64::from_bits(TWO_PI[2]), y);
y
}
#[inline]
pub(crate) fn rem2pif_any(x: f32) -> f64 {
let x_abs = x.abs();
if x_abs.to_bits() < 0x5600_0000u32 {
rem2pif_small(x_abs)
} else {
let p = rem2pi_any(x_abs as f64);
p.angle.to_f64()
}
}
pub(crate) fn frac2pi_d128(x: DyadicFloat128) -> DyadicFloat128 {
let e = x.biased_exponent();
let mut fe = x;
if e <= 1
{
let mut x_hi = (x.mantissa >> 64) as u64;
let mut x_lo: u64;
let mut u: u128 = x_hi as u128 * INVPI_2_64[1] as u128;
let tiny: u64 = u as u64;
x_lo = (u >> 64) as u64;
u = x_hi as u128 * INVPI_2_64[0] as u128;
x_lo = x_lo.wrapping_add(u as u64);
x_hi = (u >> 64) as u64 + (x_lo < u as u64) as u64;
let mut e = x.exponent;
fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
fe.normalize();
e -= fe.exponent;
if (e) != 0 {
x_hi = (fe.mantissa >> 64) as u64;
x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64;
x_lo |= tiny >> (64 - e);
fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
}
return fe;
}
let i = (if e < 127 { 0 } else { (e - 127 + 64 - 1) / 64 }) as usize; let mut c: [u64; 5] = [0u64; 5];
let mut x_hi = (x.mantissa >> 64) as u64;
let mut x_lo: u64;
let mut u: u128 = x_hi as u128 * INVPI_2_64[i + 3] as u128; c[0] = u as u64;
c[1] = (u >> 64) as u64;
u = x_hi as u128 * INVPI_2_64[i + 2] as u128;
c[1] = c[1].wrapping_add(u as u64);
c[2] = (u >> 64) as u64 + (c[1] < u as u64) as u64;
u = x_hi as u128 * INVPI_2_64[i + 1] as u128;
c[2] = c[2].wrapping_add(u as u64);
c[3] = (u >> 64) as u64 + (c[2] < u as u64) as u64;
u = x_hi as u128 * INVPI_2_64[i] as u128;
c[3] = c[3].wrapping_add(u as u64);
c[4] = (u >> 64) as u64 + (c[3] < u as u64) as u64;
let f = e as i32 - 64 * i as i32;
let tiny;
if f < 64 {
x_hi = (c[4] << f) | (c[3] >> (64 - f));
x_lo = (c[3] << f) | (c[2] >> (64 - f));
tiny = (c[2] << f) | (c[1] >> (64 - f));
} else if f == 64 {
x_hi = c[3];
x_lo = c[2];
tiny = c[1];
} else
{
let g = f - 64;
u = x_hi as u128 * INVPI_2_64[i + 4] as u128; u >>= 64;
c[0] = c[0].wrapping_add(u as u64);
c[1] += (c[0] < u as u64) as u64;
c[2] += ((c[0] < u as u64) && c[1] == 0) as u64;
c[3] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0) as u64;
c[4] += ((c[0] < u as u64) && c[1] == 0 && c[2] == 0 && c[3] == 0) as u64;
x_hi = (c[3] << g) | (c[2] >> (64 - g));
x_lo = (c[2] << g) | (c[1] >> (64 - g));
tiny = (c[1] << g) | (c[0] >> (64 - g));
}
let mut fe = x;
fe.exponent = -127;
fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
fe.normalize();
let ze = fe.biased_exponent();
if ze < 0 {
x_hi = (fe.mantissa >> 64) as u64;
x_lo = (fe.mantissa & 0xffff_ffff_ffff_ffff) as u64;
x_lo |= tiny >> (64 + ze);
fe.mantissa = (x_hi as u128).wrapping_shl(64) | (x_lo as u128);
}
fe
}
pub(crate) fn rem2pi_f128(x: DyadicFloat128) -> DyadicFloat128 {
const TWO_PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -125,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let frac2pi = frac2pi_d128(x);
TWO_PI * frac2pi
}