use super::*;
pub fn sin<const N: usize>(mut d: F64x<N>) -> F64x<N> {
let r = d;
let mut ql;
if d.abs().simd_lt(F64x::TRIGRANGEMAX2).all() {
let dql = (d * F64x::FRAC_1_PI).round();
ql = dql.roundi();
d = dql.mla(-F64x::PI_A2, d);
d = dql.mla(-F64x::PI_B2, d);
} else if d.abs().simd_lt(F64x::TRIGRANGEMAX).all() {
let dqh = (d * (F64x::FRAC_1_PI / F64x::D1_24)).trunc();
let dqh = dqh * F64x::D1_24;
let dql = d.mul_sub(F64x::FRAC_1_PI, dqh).round();
ql = dql.roundi();
d = dqh.mla(-F64x::PI_A, d);
d = dql.mla(-F64x::PI_A, d);
d = dqh.mla(-F64x::PI_B, d);
d = dql.mla(-F64x::PI_B, d);
d = dqh.mla(-F64x::PI_C, d);
d = dql.mla(-F64x::PI_C, d);
d = (dqh + dql).mla(-F64x::PI_D, d);
} else {
let (mut ddidd, ddii) = rempi(d);
ql = ddii & Ix::splat(3);
ql = ql
+ ql
+ ddidd
.0
.simd_gt(F64x::ZERO)
.cast::<i32>()
.select(Ix::splat(2), Ix::splat(1));
ql >>= Ix::splat(2);
let o = (ddii & Ix::splat(1)).simd_eq(Ix::splat(1));
let mut x = Doubled::new(
F64x::splat(-crate::f64::D_PI.0 * 0.5).mul_sign(ddidd.0),
F64x::splat(-crate::f64::D_PI.1 * 0.5).mul_sign(ddidd.0),
);
x = ddidd + x;
ddidd = o.cast().select_doubled(x, ddidd);
d = F64x::from(ddidd);
d = F64x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | d.to_bits());
}
let s = d * d;
d = F64x::from_bits(
((ql & Ix::splat(1)).simd_eq(Ix::splat(1)).to_int().cast() & F64x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let s2 = s * s;
let s4 = s2 * s2;
let mut u = F64x::poly8(
s,
s2,
s4,
-7.972_559_550_090_378_688_919_52_e-18,
2.810_099_727_108_632_000_912_51_e-15,
-7.647_122_191_181_588_332_884_84_e-13,
1.605_904_306_056_645_016_290_54_e-10,
-2.505_210_837_635_020_458_107_55_e-8,
2.755_731_922_391_987_476_304_16_e-6,
-0.000_198_412_698_412_696_162_806_809,
0.008_333_333_333_333_329_748_238_15,
)
.mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
u = s * (u * d) + d;
r.is_neg_zero().select(r, u)
}
pub fn sin_deterministic<const N: usize>(mut d: F64x<N>) -> F64x<N> {
let r = d;
let dql = (d * F64x::FRAC_1_PI).round();
let mut ql = dql.roundi();
d = dql.mla(-F64x::PI_A2, d);
d = dql.mla(-F64x::PI_B2, d);
let g = r.abs().simd_lt(F64x::TRIGRANGEMAX2);
if !g.all() {
let mut dqh = (r * F64x::FRAC_1_PI / F64x::D1_24).trunc();
dqh *= F64x::D1_24;
let dql = r.mul_sub(F64x::FRAC_1_PI, dqh).round();
let mut u = dqh.mla(-F64x::PI_A, r);
u = dql.mla(-F64x::PI_A, u);
u = dqh.mla(-F64x::PI_B, u);
u = dql.mla(-F64x::PI_B, u);
u = dqh.mla(-F64x::PI_C, u);
u = dql.mla(-F64x::PI_C, u);
u = (dqh + dql).mla(-F64x::PI_D, u);
ql = g.cast::<i32>().select(ql, dql.roundi());
d = g.select(d, u);
let g = r.abs().simd_lt(F64x::TRIGRANGEMAX);
if !g.all() {
let (mut ddidd, ddii) = rempi(r);
let mut ql2 = ddii & Ix::splat(3);
ql2 = ql2
+ ql2
+ ddidd
.0
.simd_gt(F64x::ZERO)
.cast::<i32>()
.select(Ix::splat(2), Ix::splat(1));
ql2 >>= Ix::splat(2);
let o = (ddii & Ix::splat(1)).simd_eq(Ix::splat(1));
let mut x = Doubled::new(
F64x::splat(-crate::f64::D_PI.0 * 0.5).mul_sign(ddidd.0),
F64x::splat(-crate::f64::D_PI.1 * 0.5).mul_sign(ddidd.0),
);
x = ddidd + x;
ddidd = o.cast().select_doubled(x, ddidd);
let u = F64x::from(ddidd);
ql = g.cast::<i32>().select(ql, ql2);
d = g.select(d, u);
d = F64x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | d.to_bits());
}
}
let s = d * d;
d = F64x::from_bits(
((ql & Ix::splat(1)).simd_eq(Ix::splat(1)).to_int().cast() & F64x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let s2 = s * s;
let s4 = s2 * s2;
let mut u = F64x::poly8(
s,
s2,
s4,
-7.972_559_550_090_378_688_919_52_e-18,
2.810_099_727_108_632_000_912_51_e-15,
-7.647_122_191_181_588_332_884_84_e-13,
1.605_904_306_056_645_016_290_54_e-10,
-2.505_210_837_635_020_458_107_55_e-8,
2.755_731_922_391_987_476_304_16_e-6,
-0.000_198_412_698_412_696_162_806_809,
0.008_333_333_333_333_329_748_238_15,
)
.mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
u = s * (u * d) + d;
r.is_neg_zero().select(r, u)
}
#[test]
fn test_sin() {
test_f_f::<2>(sin, rug::Float::sin, f64::MIN..=f64::MAX, 3.5);
test_f_f::<2>(sin_deterministic, rug::Float::sin, f64::MIN..=f64::MAX, 3.5);
}
pub fn cos<const N: usize>(mut d: F64x<N>) -> F64x<N> {
let r = d;
let mut ql;
if d.abs().simd_lt(F64x::TRIGRANGEMAX2).all() {
let dql = F64x::splat(2.).mla(d.mla(F64x::FRAC_1_PI, F64x::splat(-0.5)).round(), F64x::ONE);
ql = dql.roundi();
d = dql.mla(-F64x::PI_A2 * F64x::HALF, d);
d = dql.mla(-F64x::PI_B2 * F64x::HALF, d);
} else if d.abs().simd_lt(F64x::TRIGRANGEMAX).all() {
let dqh = d
.mla(
F64x::FRAC_1_PI / F64x::D1_23,
-F64x::FRAC_1_PI / F64x::D1_24,
)
.trunc();
ql = (d * F64x::FRAC_1_PI + dqh.mla(-F64x::D1_23, F64x::splat(-0.5))).roundi();
let dqh = dqh * F64x::D1_24;
ql = ql + ql + Ix::splat(1);
let dql = ql.cast::<f64>();
d = dqh.mla(-F64x::PI_A * F64x::HALF, d);
d = dql.mla(-F64x::PI_A * F64x::HALF, d);
d = dqh.mla(-F64x::PI_B * F64x::HALF, d);
d = dql.mla(-F64x::PI_B * F64x::HALF, d);
d = dqh.mla(-F64x::PI_C * F64x::HALF, d);
d = dql.mla(-F64x::PI_C * F64x::HALF, d);
d = (dqh + dql).mla(-F64x::PI_D * F64x::HALF, d);
} else {
let (mut ddidd, ddii) = rempi(d);
ql = ddii & Ix::splat(3);
ql = ql
+ ql
+ ddidd
.0
.simd_gt(F64x::ZERO)
.cast::<i32>()
.select(Ix::splat(8), Ix::splat(7));
ql >>= Ix::splat(1);
let o = (ddii & Ix::splat(1)).simd_eq(Ix::splat(0));
let y = ddidd
.0
.simd_gt(F64x::ZERO)
.select(F64x::ZERO, F64x::splat(-1.));
let mut x = Doubled::new(
F64x::splat(-crate::f64::D_PI.0 * 0.5).mul_sign(y),
F64x::splat(-crate::f64::D_PI.1 * 0.5).mul_sign(y),
);
x = ddidd + x;
ddidd = o.cast().select_doubled(x, ddidd);
d = F64x::from(ddidd);
d = F64x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | d.to_bits());
}
let s = d * d;
d = F64x::from_bits(
((ql & Ix::splat(2)).simd_eq(Ix::splat(0)).to_int().cast() & F64x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let s2 = s * s;
let s4 = s2 * s2;
let u = F64x::poly8(
s,
s2,
s4,
-7.972_559_550_090_378_688_919_52_e-18,
2.810_099_727_108_632_000_912_51_e-15,
-7.647_122_191_181_588_332_884_84_e-13,
1.605_904_306_056_645_016_290_54_e-10,
-2.505_210_837_635_020_458_107_55_e-8,
2.755_731_922_391_987_476_304_16_e-6,
-0.000_198_412_698_412_696_162_806_809,
0.008_333_333_333_333_329_748_238_15,
)
.mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
s * (u * d) + d
}
pub fn cos_deterministic<const N: usize>(mut d: F64x<N>) -> F64x<N> {
let r = d;
let g = d.abs().simd_lt(F64x::TRIGRANGEMAX2);
let dql = F64x::splat(2.).mla(
(d.mla(F64x::FRAC_1_PI, F64x::splat(-0.5))).round(),
F64x::ONE,
);
let mut ql = dql.roundi();
d = dql.mla(-F64x::PI_A2 * F64x::HALF, d);
d = dql.mla(-F64x::PI_B2 * F64x::HALF, d);
if !g.all() {
let mut dqh = (r.mla(
F64x::FRAC_1_PI / F64x::D1_23,
-F64x::FRAC_1_PI / F64x::D1_24,
))
.trunc();
let mut ql2 = (r * F64x::FRAC_1_PI + dqh.mla(-F64x::D1_23, F64x::splat(-0.5))).roundi();
dqh *= F64x::D1_24;
ql2 = ql2 + ql2 + Ix::splat(1);
let dql = ql2.cast::<f64>();
let mut u = dqh.mla(-F64x::PI_A * F64x::HALF, r);
u = dql.mla(-F64x::PI_A * F64x::HALF, u);
u = dqh.mla(-F64x::PI_B * F64x::HALF, u);
u = dql.mla(-F64x::PI_B * F64x::HALF, u);
u = dqh.mla(-F64x::PI_C * F64x::HALF, u);
u = dql.mla(-F64x::PI_C * F64x::HALF, u);
u = (dqh + dql).mla(-F64x::PI_D * F64x::HALF, u);
ql = g.cast::<i32>().select(ql, ql2);
d = g.select(d, u);
let g = r.abs().simd_lt(F64x::TRIGRANGEMAX);
if !g.all() {
let (mut ddidd, ddii) = rempi(r);
let mut ql2 = ddii & Ix::splat(3);
ql2 = ql2
+ ql2
+ ddidd
.0
.simd_gt(F64x::ZERO)
.cast::<i32>()
.select(Ix::splat(8), Ix::splat(7));
ql2 >>= Ix::splat(1);
let o = (ddii & Ix::splat(1)).simd_eq(Ix::splat(0));
let y = ddidd
.0
.simd_gt(F64x::ZERO)
.select(F64x::ZERO, F64x::splat(-1.));
let mut x = Doubled::new(
F64x::splat(-crate::f64::D_PI.0 * 0.5).mul_sign(y),
F64x::splat(-crate::f64::D_PI.1 * 0.5).mul_sign(y),
);
x = ddidd + x;
ddidd = o.cast().select_doubled(x, ddidd);
let u = F64x::from(ddidd);
ql = g.cast::<i32>().select(ql, ql2);
d = g.select(d, u);
d = F64x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | d.to_bits());
}
}
let s = d * d;
d = F64x::from_bits(
((ql & Ix::splat(2)).simd_eq(Ix::splat(0)).to_int().cast() & F64x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let s2 = s * s;
let s4 = s2 * s2;
let u = F64x::poly8(
s,
s2,
s4,
-7.972_559_550_090_378_688_919_52_e-18,
2.810_099_727_108_632_000_912_51_e-15,
-7.647_122_191_181_588_332_884_84_e-13,
1.605_904_306_056_645_016_290_54_e-10,
-2.505_210_837_635_020_458_107_55_e-8,
2.755_731_922_391_987_476_304_16_e-6,
-0.000_198_412_698_412_696_162_806_809,
0.008_333_333_333_333_329_748_238_15,
)
.mla(s, F64x::splat(-0.166_666_666_666_666_657_414_808));
s * (u * d) + d
}
#[test]
fn test_cos() {
test_f_f::<2>(cos, rug::Float::cos, f64::MIN..=f64::MAX, 3.5);
test_f_f::<2>(cos_deterministic, rug::Float::cos, f64::MIN..=f64::MAX, 3.5);
}
pub fn sincos<const N: usize>(d: F64x<N>) -> (F64x<N>, F64x<N>) {
let mut s;
let ql;
if d.abs().simd_lt(F64x::TRIGRANGEMAX2).all() {
let dql = (d * F64x::FRAC_2_PI).round();
ql = dql.roundi();
s = dql.mla(-F64x::PI_A2 * F64x::HALF, d);
s = dql.mla(-F64x::PI_B2 * F64x::HALF, s);
} else if d.abs().simd_lt(F64x::TRIGRANGEMAX).all() {
let dqh = (d * (F64x::FRAC_2_PI / F64x::D1_24)).trunc();
let dqh = dqh * F64x::D1_24;
let dql = (d * F64x::FRAC_2_PI - dqh).round();
ql = dql.roundi();
s = dqh.mla(-F64x::PI_A * F64x::HALF, d);
s = dql.mla(-F64x::PI_A * F64x::HALF, s);
s = dqh.mla(-F64x::PI_B * F64x::HALF, s);
s = dql.mla(-F64x::PI_B * F64x::HALF, s);
s = dqh.mla(-F64x::PI_C * F64x::HALF, s);
s = dql.mla(-F64x::PI_C * F64x::HALF, s);
s = (dqh + dql).mla(-F64x::PI_D * F64x::HALF, s);
} else {
let (ddidd, ddii) = rempi(d);
ql = ddii;
s = F64x::from(ddidd);
s = F64x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | s.to_bits());
}
let t = s;
s = s * s;
let u = F64x::splat(1.589_383_072_832_289_373_285_11_e-10)
.mla(s, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
.mla(s, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
.mla(s, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
.mla(s, F64x::splat(0.008_333_333_333_319_184_596_174_6))
.mla(s, F64x::splat(-0.166_666_666_666_666_130_709_393));
let rx = (u * s).mla(t, t);
let rx = d.is_neg_zero().select(F64x::NEG_ZERO, rx);
let u = F64x::splat(-1.136_153_502_390_974_295_315_23_e-11)
.mla(s, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
.mla(s, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
.mla(s, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
.mla(s, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
.mla(s, F64x::splat(0.041_666_666_666_666_551_959_206_2))
.mla(s, F64x::splat(-0.5));
let ry = s.mla(u, F64x::ONE);
let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(0)).cast::<i64>();
let mut rsin = o.select(rx, ry);
let mut rcos = o.select(ry, rx);
let o = (ql & Ix::splat(2)).simd_eq(Ix::splat(2)).cast::<i64>();
rsin = F64x::from_bits((o.to_int().cast() & F64x::NEG_ZERO.to_bits()) ^ rsin.to_bits());
let o = ((ql + Ix::splat(1)) & Ix::splat(2))
.simd_eq(Ix::splat(2))
.cast::<i64>();
rcos = F64x::from_bits((o.to_int().cast() & F64x::NEG_ZERO.to_bits()) ^ rcos.to_bits());
(rsin, rcos)
}
pub fn sincos_deterministic<const N: usize>(d: F64x<N>) -> (F64x<N>, F64x<N>) {
let mut s = d;
let dql = (s * F64x::FRAC_2_PI).round();
let mut ql = dql.roundi();
s = dql.mla(-F64x::PI_A2 * F64x::HALF, s);
s = dql.mla(-F64x::PI_B2 * F64x::HALF, s);
let g = d.abs().simd_lt(F64x::TRIGRANGEMAX2);
if !g.all() {
let mut dqh = (d * F64x::FRAC_2_PI / F64x::D1_24).trunc();
dqh *= F64x::D1_24;
let dql = (d * F64x::FRAC_2_PI - dqh).round();
let mut u = dqh.mla(-F64x::PI_A * F64x::HALF, d);
u = dql.mla(-F64x::PI_A * F64x::HALF, u);
u = dqh.mla(-F64x::PI_B * F64x::HALF, u);
u = dql.mla(-F64x::PI_B * F64x::HALF, u);
u = dqh.mla(-F64x::PI_C * F64x::HALF, u);
u = dql.mla(-F64x::PI_C * F64x::HALF, u);
u = (dqh + dql).mla(-F64x::PI_D * F64x::HALF, u);
ql = g.cast::<i32>().select(ql, dql.roundi());
s = g.select(s, u);
let g = d.abs().simd_lt(F64x::TRIGRANGEMAX);
if !g.all() {
let (ddidd, ddii) = rempi(d);
let mut u = F64x::from(ddidd);
u = F64x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | u.to_bits());
ql = g.cast::<i32>().select(ql, ddii);
s = g.select(s, u);
}
}
let t = s;
s = s * s;
let u = F64x::splat(1.589_383_072_832_289_373_285_11_e-10)
.mla(s, F64x::splat(-2.505_069_435_025_397_733_493_18_e-8))
.mla(s, F64x::splat(2.755_731_317_768_463_605_125_47_e-6))
.mla(s, F64x::splat(-0.000_198_412_698_278_911_770_864_914))
.mla(s, F64x::splat(0.008_333_333_333_319_184_596_174_6))
.mla(s, F64x::splat(-0.166_666_666_666_666_130_709_393));
let mut rx = (u * s).mla(t, t);
rx = d.is_neg_zero().select(F64x::NEG_ZERO, rx);
let u = F64x::splat(-1.136_153_502_390_974_295_315_23_e-11)
.mla(s, F64x::splat(2.087_574_712_070_400_554_793_66_e-9))
.mla(s, F64x::splat(-2.755_731_440_288_475_674_985_67_e-7))
.mla(s, F64x::splat(2.480_158_728_900_018_673_119_15_e-5))
.mla(s, F64x::splat(-0.001_388_888_888_887_140_192_823_29))
.mla(s, F64x::splat(0.041_666_666_666_666_551_959_206_2))
.mla(s, F64x::splat(-0.5));
let ry = s.mla(u, F64x::ONE);
let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(0)).cast::<i64>();
let mut rsin = o.select(rx, ry);
let mut rcos = o.select(ry, rx);
let o = (ql & Ix::splat(2)).simd_eq(Ix::splat(2)).cast::<i64>();
rsin = F64x::from_bits((o.to_int().cast() & F64x::NEG_ZERO.to_bits()) ^ rsin.to_bits());
let o = ((ql + Ix::splat(1)) & Ix::splat(2))
.simd_eq(Ix::splat(2))
.cast::<i64>();
rcos = F64x::from_bits((o.to_int().cast() & F64x::NEG_ZERO.to_bits()) ^ rcos.to_bits());
(rsin, rcos)
}
#[test]
fn test_sincos() {
test_f_ff::<2>(
sincos,
|in1| {
let prec = in1.prec();
in1.sin_cos(rug::Float::new(prec))
},
f64::MIN..=f64::MAX,
3.5,
);
test_f_ff::<2>(
sincos_deterministic,
|in1| {
let prec = in1.prec();
in1.sin_cos(rug::Float::new(prec))
},
f64::MIN..=f64::MAX,
3.5,
);
}
pub fn tan<const N: usize>(d: F64x<N>) -> F64x<N> {
let ql;
let mut x;
if d.abs().simd_lt(F64x::TRIGRANGEMAX2).all() {
let dql = (d * F64x::FRAC_2_PI).round();
ql = dql.roundi();
x = dql.mla(-F64x::PI_A2 * F64x::HALF, d);
x = dql.mla(-F64x::PI_B2 * F64x::HALF, x);
} else if d.abs().simd_lt(F64x::splat(1e+6)).all() {
let dqh = (d * (F64x::FRAC_2_PI / F64x::D1_24)).trunc();
let dqh = dqh * F64x::D1_24;
let dql = (d * F64x::FRAC_2_PI - dqh).round();
ql = dql.roundi();
x = dqh.mla(-F64x::PI_A * F64x::HALF, d);
x = dql.mla(-F64x::PI_A * F64x::HALF, x);
x = dqh.mla(-F64x::PI_B * F64x::HALF, x);
x = dql.mla(-F64x::PI_B * F64x::HALF, x);
x = dqh.mla(-F64x::PI_C * F64x::HALF, x);
x = dql.mla(-F64x::PI_C * F64x::HALF, x);
x = (dqh + dql).mla(-F64x::PI_D * F64x::HALF, x);
} else {
let (ddidd, ddii) = rempi(d);
ql = ddii;
x = F64x::from(ddidd);
x = F64x::from_bits(d.is_infinite().to_int().cast() | x.to_bits());
x = F64x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | x.to_bits());
}
x *= F64x::HALF;
let s = x * x;
let s2 = s * s;
let s4 = s2 * s2;
let mut u = F64x::poly8(
s,
s2,
s4,
0.324_509_882_663_927_631_6_e-3,
0.561_921_973_811_432_373_5_e-3,
0.146_078_150_240_278_449_4_e-2,
0.359_161_154_079_249_951_9_e-2,
0.886_326_840_956_311_312_6_e-2,
0.218_694_872_818_553_549_8_e-1,
0.539_682_539_951_727_297_e-1,
0.133_333_333_333_050_058_1,
)
.mla(s, F64x::splat(0.333_333_333_333_334_369_5));
u = s.mla(u * x, x);
let y = u.mla(u, -F64x::ONE);
x = u * F64x::splat(-2.);
let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(1)).cast::<i64>();
u = o.select(-y, x) / o.select(x, y);
d.simd_eq(F64x::ZERO).select(d, u)
}
pub fn tan_deterministic<const N: usize>(d: F64x<N>) -> F64x<N> {
let dql = (d * F64x::FRAC_2_PI).round();
let mut ql = dql.roundi();
let mut s = dql.mla(-F64x::PI_A2 * F64x::HALF, d);
s = dql.mla(-F64x::PI_B2 * F64x::HALF, s);
let g = d.abs().simd_lt(F64x::TRIGRANGEMAX2);
if !g.all() {
let mut dqh = (d * F64x::FRAC_2_PI / F64x::D1_24).trunc();
dqh *= F64x::D1_24;
let dql = (d * F64x::FRAC_2_PI - dqh).round();
let mut u = dqh.mla(-F64x::PI_A * F64x::HALF, d);
u = dql.mla(-F64x::PI_A * F64x::HALF, u);
u = dqh.mla(-F64x::PI_B * F64x::HALF, u);
u = dql.mla(-F64x::PI_B * F64x::HALF, u);
u = dqh.mla(-F64x::PI_C * F64x::HALF, u);
u = dql.mla(-F64x::PI_C * F64x::HALF, u);
u = (dqh + dql).mla(-F64x::PI_D * F64x::HALF, u);
ql = g.cast::<i32>().select(ql, dql.roundi());
s = g.select(s, u);
let g = d.abs().simd_lt(F64x::splat(1e+6));
if !g.all() {
let (ddidd, ddii) = rempi(d);
let ql2 = ddii;
let mut u = F64x::from(ddidd);
u = F64x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | u.to_bits());
ql = g.cast::<i32>().select(ql, ql2);
s = g.select(s, u);
}
}
let x = s * F64x::HALF;
let s = x * x;
let s2 = s * s;
let s4 = s2 * s2;
let mut u = F64x::poly8(
s,
s2,
s4,
0.324_509_882_663_927_631_6_e-3,
0.561_921_973_811_432_373_5_e-3,
0.146_078_150_240_278_449_4_e-2,
0.359_161_154_079_249_951_9_e-2,
0.886_326_840_956_311_312_6_e-2,
0.218_694_872_818_553_549_8_e-1,
0.539_682_539_951_727_297_e-1,
0.133_333_333_333_050_058_1,
)
.mla(s, F64x::splat(0.333_333_333_333_334_369_5));
u = s.mla(u * x, x);
let y = u.mla(u, -F64x::ONE);
let x = u * F64x::splat(-2.);
let o = (ql & Ix::splat(1)).simd_eq(Ix::splat(1)).cast::<i64>();
u = o.select(-y, x) / o.select(x, y);
d.simd_eq(F64x::ZERO).select(d, u)
}
#[test]
fn test_tan() {
test_f_f::<2>(tan, rug::Float::tan, f64::MIN..=f64::MAX, 3.5);
test_f_f::<2>(tan_deterministic, rug::Float::tan, f64::MIN..=f64::MAX, 3.5);
}
pub fn sincospi<const N: usize>(d: F64x<N>) -> (F64x<N>, F64x<N>) {
let u = d * F64x::splat(4.);
let mut q = u.trunci();
q = (q + ((q.cast() >> Ux::splat(31)).cast() ^ Ix::splat(1))) & Ix::splat(!1);
let s = u - q.cast();
let t = s;
let s = s * s;
let u = F64x::splat(0.688_063_889_476_606_013_6_e-11)
.mla(s, F64x::splat(-0.175_715_956_454_231_019_9_e-8))
.mla(s, F64x::splat(0.313_361_632_725_786_731_1_e-6))
.mla(s, F64x::splat(-0.365_762_041_638_848_645_2_e-4))
.mla(s, F64x::splat(0.249_039_457_018_993_210_3_e-2))
.mla(s, F64x::splat(-0.807_455_121_882_805_632_e-1))
.mla(s, F64x::splat(0.785_398_163_397_448_279));
let rx = u * t;
let u = F64x::splat(-0.386_014_121_368_379_435_2_e-12)
.mla(s, F64x::splat(0.115_005_788_802_968_141_5_e-9))
.mla(s, F64x::splat(-0.246_113_649_300_666_355_3_e-7))
.mla(s, F64x::splat(0.359_086_044_662_351_671_3_e-5))
.mla(s, F64x::splat(-0.325_991_886_926_943_594_2_e-3))
.mla(s, F64x::splat(0.158_543_442_438_154_116_9_e-1))
.mla(s, F64x::splat(-0.308_425_137_534_042_437_3))
.mla(s, F64x::ONE);
let ry = u;
let o = (q & Ix::splat(2)).simd_eq(Ix::splat(0)).cast::<i64>();
let mut rsin = o.select(rx, ry);
let mut rcos = o.select(ry, rx);
let o = (q & Ix::splat(4)).simd_eq(Ix::splat(4)).cast::<i64>();
rsin = F64x::from_bits((o.to_int().cast() & F64x::NEG_ZERO.to_bits()) ^ rsin.to_bits());
let o = ((q + Ix::splat(2)) & Ix::splat(4))
.simd_eq(Ix::splat(4))
.cast::<i64>();
rcos = F64x::from_bits((o.to_int().cast() & F64x::NEG_ZERO.to_bits()) ^ rcos.to_bits());
let o = d.abs().simd_gt(F64x::TRIGRANGEMAX3 / F64x::splat(4.));
rsin = F64x::from_bits(!o.to_int().cast::<u64>() & rsin.to_bits());
rcos = F64x::from_bits(!o.to_int().cast::<u64>() & rcos.to_bits());
let o = d.is_infinite();
rsin = F64x::from_bits(o.to_int().cast() | rsin.to_bits());
rcos = F64x::from_bits(o.to_int().cast() | rcos.to_bits());
(rsin, rcos)
}
#[test]
fn test_sincospi() {
use rug::{float::Constant, Float};
let rangemax2 = 1e+9 / 4.;
test_f_ff::<2>(
sincospi,
|mut in1| {
let prec = in1.prec();
in1.set_prec(prec * 2);
(in1 * Float::with_val(prec * 2, Constant::Pi)).sin_cos(Float::new(prec))
},
-rangemax2..=rangemax2,
1.5,
);
}
#[inline]
fn atan2k<const N: usize>(y: F64x<N>, x: F64x<N>) -> F64x<N> {
let q = x.is_sign_negative().cast().to_int() & Ix::splat(-2);
let x = x.abs();
let q = x.simd_lt(y).cast::<i32>().select(q + Ix::splat(1), q);
let p = x.simd_lt(y);
let s = p.select(-x, y);
let mut t = x.simd_max(y);
let s = s / t;
t = s * s;
let t2 = t * t;
let t4 = t2 * t2;
let t8 = t4 * t4;
let t16 = t8 * t8;
let u = F64x::poly19(
t,
t2,
t4,
t8,
t16,
-1.887_960_084_630_734_965_637_46_e-05,
0.000_209_850_076_645_816_976_906_797,
-0.001_106_118_314_866_724_825_634_71,
0.003_700_267_441_887_131_192_324_03,
-0.008_898_961_958_876_554_917_408_09,
0.016_599_329_773_529_201_970_117,
-0.025_451_762_493_231_264_161_686_1,
0.033_785_258_000_135_306_999_389_7,
-0.040_762_919_127_683_650_000_193_4,
0.046_666_715_007_784_062_563_267_5,
-0.052_367_485_230_348_245_761_611_3,
0.058_766_639_292_667_358_085_431_3,
-0.066_657_357_936_108_052_598_456_2,
0.076_921_953_831_176_961_835_502_9,
-0.090_908_995_008_245_008_229_153,
0.111_111_105_648_261_418_443_745,
-0.142_857_142_667_713_293_837_65,
0.199_999_999_996_591_265_594_148,
-0.333_333_333_333_311_110_369_124,
);
t = s.mla(t * u, s);
q.cast::<f64>().mla(F64x::FRAC_PI_2, t)
}
pub fn atan2<const N: usize>(y: F64x<N>, x: F64x<N>) -> F64x<N> {
let mut r = atan2k(y.abs(), x);
r = r.mul_sign(x);
r = (x.is_infinite() | x.simd_eq(F64x::ZERO)).select(
F64x::FRAC_PI_2 - visinf2_vd_vd_vd(x, F64x::FRAC_PI_2.mul_sign(x)),
r,
);
r = y.is_infinite().select(
F64x::FRAC_PI_2 - visinf2_vd_vd_vd(x, F64x::FRAC_PI_4.mul_sign(x)),
r,
);
r = y.simd_eq(F64x::ZERO).select(
F64x::from_bits(x.is_sign_negative().to_int().cast() & F64x::PI.to_bits()),
r,
);
F64x::from_bits((x.is_nan() | y.is_nan()).to_int().cast() | r.mul_sign(y).to_bits())
}
#[test]
fn test_atan2() {
test_ff_f::<2>(
atan2,
rug::Float::atan2,
f64::MIN..=f64::MAX,
f64::MIN..=f64::MAX,
3.5,
);
}
pub fn asin<const N: usize>(d: F64x<N>) -> F64x<N> {
let o = d.abs().simd_lt(F64x::HALF);
let x2 = o.select(d * d, (F64x::ONE - d.abs()) * F64x::HALF);
let x = o.select(d.abs(), x2.sqrt());
let x4 = x2 * x2;
let x8 = x4 * x4;
let x16 = x8 * x8;
let mut u = F64x::poly12(
x2,
x4,
x8,
x16,
0.316_158_765_065_393_462_8_e-1,
-0.158_191_824_332_999_664_3_e-1,
0.192_904_547_726_791_067_4_e-1,
0.660_607_747_627_717_061_e-2,
0.121_536_052_557_737_733_1_e-1,
0.138_871_518_450_160_921_8_e-1,
0.173_595_699_122_361_460_4_e-1,
0.223_717_618_193_204_834_1_e-1,
0.303_819_592_803_813_223_7_e-1,
0.446_428_568_137_710_243_8_e-1,
0.750_000_000_037_858_161_1_e-1,
0.166_666_666_666_649_754_3,
);
u = u.mla(x * x2, x);
let r = o.select(u, u.mla(F64x::splat(-2.), F64x::FRAC_PI_2));
r.mul_sign(d)
}
#[test]
fn test_asin() {
test_f_f::<2>(asin, rug::Float::asin, -1.0..=1.0, 3.5);
}
pub fn acos<const N: usize>(d: F64x<N>) -> F64x<N> {
let o = d.abs().simd_lt(F64x::HALF);
let x2 = o.select(d * d, (F64x::ONE - d.abs()) * F64x::HALF);
let mut x = o.select(d.abs(), x2.sqrt());
x = d.abs().simd_eq(F64x::ONE).select(F64x::ZERO, x);
let x4 = x2 * x2;
let x8 = x4 * x4;
let x16 = x8 * x8;
let mut u = F64x::poly12(
x2,
x4,
x8,
x16,
0.316_158_765_065_393_462_8_e-1,
-0.158_191_824_332_999_664_3_e-1,
0.192_904_547_726_791_067_4_e-1,
0.660_607_747_627_717_061_e-2,
0.121_536_052_557_737_733_1_e-1,
0.138_871_518_450_160_921_8_e-1,
0.173_595_699_122_361_460_4_e-1,
0.223_717_618_193_204_834_1_e-1,
0.303_819_592_803_813_223_7_e-1,
0.446_428_568_137_710_243_8_e-1,
0.750_000_000_037_858_161_1_e-1,
0.166_666_666_666_649_754_3,
);
u *= x2 * x;
let y = F64x::FRAC_PI_2 - (x.mul_sign(d) + u.mul_sign(d));
x += u;
let r = o.select(y, x * F64x::splat(2.));
(!o & d.simd_lt(F64x::ZERO)).select(
Doubled::<F64x<N>>::splat(crate::f64::D_PI)
.add_checked(-r)
.0,
r,
)
}
#[test]
fn test_acos() {
test_f_f::<2>(acos, rug::Float::acos, -1.0..=1.0, 3.5);
}
pub fn atan<const N: usize>(mut s: F64x<N>) -> F64x<N> {
let q = s.is_sign_negative().cast().to_int() & Ix::splat(2);
s = s.abs();
let q = F64x::ONE
.simd_lt(s)
.cast::<i32>()
.select(q + Ix::splat(1), q);
s = F64x::ONE.simd_lt(s).select(s.recip(), s);
let mut t = s * s;
let t2 = t * t;
let t4 = t2 * t2;
let t8 = t4 * t4;
let t16 = t8 * t8;
let u = F64x::poly19(
t,
t2,
t4,
t8,
t16,
-1.887_960_084_630_734_965_637_46_e-5,
0.000_209_850_076_645_816_976_906_797,
-0.001_106_118_314_866_724_825_634_71,
0.003_700_267_441_887_131_192_324_03,
-0.008_898_961_958_876_554_917_408_09,
0.016_599_329_773_529_201_970_117,
-0.025_451_762_493_231_264_161_686_1,
0.033_785_258_000_135_306_999_389_7,
-0.040_762_919_127_683_650_000_193_4,
0.046_666_715_007_784_062_563_267_5,
-0.052_367_485_230_348_245_761_611_3,
0.058_766_639_292_667_358_085_431_3,
-0.066_657_357_936_108_052_598_456_2,
0.076_921_953_831_176_961_835_502_9,
-0.090_908_995_008_245_008_229_153,
0.111_111_105_648_261_418_443_745,
-0.142_857_142_667_713_293_837_65,
0.199_999_999_996_591_265_594_148,
-0.333_333_333_333_311_110_369_124,
);
t = s.mla(t * u, s);
t = (q & Ix::splat(1))
.simd_eq(Ix::splat(1))
.cast::<i64>()
.select(F64x::FRAC_PI_2 - t, t);
t = F64x::from_bits(
((q & Ix::splat(2)).simd_eq(Ix::splat(2)).to_int().cast() & F64x::NEG_ZERO.to_bits())
^ t.to_bits(),
);
t
}
#[test]
fn test_atan() {
test_f_f::<2>(atan, rug::Float::atan, f64::MIN..=f64::MAX, 3.5);
}
#[inline]
fn expm1k<const N: usize>(d: F64x<N>) -> F64x<N> {
let mut u = (d * F64x::R_LN2).round();
let q = u.roundi();
let s = u.mla(-F64x::L2_U, d);
let s = u.mla(-F64x::L2_L, s);
let s2 = s * s;
let s4 = s2 * s2;
let s8 = s4 * s4;
u = F64x::poly10(
s,
s2,
s4,
s8,
2.088_606_211_072_836_875_363_41_e-9,
2.511_129_308_928_765_186_106_61_e-8,
2.755_739_112_349_004_718_933_38_e-7,
2.755_723_629_119_288_276_294_23_e-6,
2.480_158_715_923_547_299_879_1_e-5,
0.000_198_412_698_960_509_205_564_975,
0.001_388_888_888_897_744_922_079_62,
0.008_333_333_333_316_527_216_649_84,
0.041_666_666_666_666_504_759_142_2,
0.166_666_666_666_666_851_703_837,
);
u = s2.mla(F64x::HALF, s2 * s * u) + s;
q.simd_eq(Ix::splat(0))
.cast::<i64>()
.select(u, ldexp2k(u + F64x::ONE, q) - F64x::ONE)
}
pub fn sinh<const N: usize>(x: F64x<N>) -> F64x<N> {
let e = expm1k(x.abs());
let mut y = (e + F64x::splat(2.)) / (e + F64x::ONE);
y *= F64x::HALF * e;
y = (x.abs().simd_gt(F64x::splat(709.)) | y.is_nan()).select(F64x::INFINITY, y);
y = y.mul_sign(x);
F64x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
}
#[test]
fn test_sinh() {
test_f_f::<2>(sinh, rug::Float::sinh, -709.0..=709.0, 3.5);
}
pub fn cosh<const N: usize>(x: F64x<N>) -> F64x<N> {
let e = u10::exp(x.abs());
let mut y = F64x::HALF.mla(e, F64x::HALF / e);
y = (x.abs().simd_gt(F64x::splat(709.)) | y.is_nan()).select(F64x::INFINITY, y);
F64x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
}
#[test]
fn test_cosh() {
test_f_f::<2>(cosh, rug::Float::cosh, -709.0..=709.0, 3.5);
}
pub fn tanh<const N: usize>(x: F64x<N>) -> F64x<N> {
let d = expm1k(F64x::splat(2.) * x.abs());
let mut y = d / (F64x::splat(2.) + d);
y = (x.abs().simd_gt(F64x::splat(18.714_973_875)) | y.is_nan()).select(F64x::ONE, y);
y = y.mul_sign(x);
F64x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
}
#[test]
fn test_tanh() {
test_f_f::<2>(tanh, rug::Float::tanh, -19.0..=19.0, 3.5);
}
pub fn log<const N: usize>(mut d: F64x<N>) -> F64x<N> {
let m;
let ef =
{
let o = d.simd_lt(F64x::splat(f64::MIN_POSITIVE));
d = o.select(d * (F64x::D1_32 * F64x::D1_32), d);
let mut e = ilogb2k(d * F64x::splat(1. / 0.75));
m = ldexp3k(d, -e);
e = o.cast::<i32>().select(e - Ix::splat(64), e);
e.cast()
};
let mut x = (m - F64x::ONE) / (F64x::ONE + m);
let x2 = x * x;
let x4 = x2 * x2;
let x8 = x4 * x4;
let x3 = x * x2;
let t = F64x::poly7(
x2,
x4,
x8,
0.153_487_338_491_425_068_243_146,
0.152_519_917_006_351_951_593_857,
0.181_863_266_251_982_985_677_316,
0.222_221_366_518_767_365_905_163,
0.285_714_294_746_548_025_383_248,
0.399_999_999_950_799_600_689_777,
0.666_666_666_666_777_874_006_3,
);
x = x.mla(
F64x::splat(2.),
F64x::splat(0.693_147_180_559_945_286_226_764) * ef,
);
x = x3.mla(t, x);
x = d.simd_eq(F64x::INFINITY).select(F64x::INFINITY, x);
x = (d.simd_lt(F64x::ZERO) | d.is_nan()).select(F64x::NAN, x);
d.simd_eq(F64x::ZERO).select(F64x::NEG_INFINITY, x)
}
#[test]
fn test_log() {
test_f_f::<2>(log, rug::Float::ln, 0.0..=f64::MAX, 3.5);
}
pub fn log2<const N: usize>(mut d: F64x<N>) -> F64x<N> {
let (m, e) = {
let o = d.simd_lt(F64x::splat(f64::MIN_POSITIVE));
d = o.select(d * (F64x::D1_32 * F64x::D1_32), d);
let e = ilogb2k(d * F64x::splat(1./0.75));
(ldexp3k(d, -e), o.cast::<i32>().select(e - Ix::splat(64), e))
};
let x = (m - F64x::ONE) / (m + F64x::ONE);
let x2 = x * x;
let t = F64x::splat(0.221_194_175_045_608_149)
.mla(x2, F64x::splat(0.220_076_869_315_227_768_9))
.mla(x2, F64x::splat(0.262_370_805_748_851_465_6))
.mla(x2, F64x::splat(0.320_597_747_794_449_550_2))
.mla(x2, F64x::splat(0.412_198_594_548_532_470_9))
.mla(x2, F64x::splat(0.577_078_016_299_705_898_2))
.mla(x2, F64x::splat(0.961_796_693_926_080_914_49));
let s = {
e.cast::<f64>().add_checked(x.mul_as_doubled(F64x::splat(2.885_390_081_777_926_774)))
};
let mut r = t.mla(x * x2, F64x::from(s));
r = d.simd_eq(F64x::INFINITY).select(F64x::INFINITY, r);
r = (d.simd_lt(F64x::ZERO) | d.is_nan()).select(F64x::NAN, r);
r = d.simd_eq(F64x::ZERO).select(F64x::NEG_INFINITY, r);
r
}
#[test]
fn test_log2() {
test_f_f::<2>(log2, rug::Float::log2, 0.0..=f64::MAX, 3.5);
}
pub fn exp10<const N: usize>(d: F64x<N>) -> F64x<N> {
let mut u = (d * F64x::LOG10_2).round();
let q = u.roundi();
let mut s = u.mla(-F64x::L10_U, d);
s = u.mla(-F64x::L10_L, s);
let s2 = s * s;
let s4 = s2 * s2;
let s8 = s4 * s4;
u = F64x::poly11(
s,
s2,
s4,
s8,
0.241_146_349_833_426_765_2_e-3,
0.115_748_841_521_718_737_5_e-2,
0.501_397_554_678_973_365_9_e-2,
0.195_976_232_072_053_308_e-1,
0.680_893_639_944_678_413_8_e-1,
0.206_995_849_472_267_623_4,
0.539_382_929_205_853_622_9,
0.117_125_514_890_854_165_5_e+1,
0.203_467_859_229_343_295_3_e+1,
0.265_094_905_523_920_587_6_e+1,
0.230_258_509_299_404_590_1_e+1,
);
u = u.mla(s, F64x::ONE);
u = ldexp2k(u, q);
u = d
.simd_gt(F64x::splat(308.254_715_559_916_71))
.select(F64x::INFINITY, u);
F64x::from_bits(!d.simd_lt(F64x::splat(-350.)).to_int().cast::<u64>() & u.to_bits())
}
#[test]
fn test_exp10() {
test_f_f::<2>(exp10, rug::Float::exp10, -350.0..=308.26, 3.5);
}
pub fn exp2<const N: usize>(d: F64x<N>) -> F64x<N> {
let mut u = d.round();
let q = u.roundi();
let s = d - u;
let s2 = s * s;
let s4 = s2 * s2;
let s8 = s4 * s4;
u = F64x::poly10(
s,
s2,
s4,
s8,
0.443_435_908_292_652_945_4_e-9,
0.707_316_459_808_570_742_5_e-8,
0.101_781_926_092_176_045_1_e-6,
0.132_154_387_251_132_761_5_e-5,
0.152_527_335_351_758_473_e-4,
0.154_035_304_510_114_780_8_e-3,
0.133_335_581_467_049_907_3_e-2,
0.961_812_910_759_760_053_6_e-2,
0.555_041_086_648_204_659_6_e-1,
0.240_226_506_959_101_221_4,
);
u = u.mla(s, F64x::splat(0.693_147_180_559_945_286_2));
u = u.mla(s, F64x::ONE);
u = ldexp2k(u, q);
u = d.simd_ge(F64x::splat(1024.)).select(F64x::INFINITY, u);
F64x::from_bits(!d.simd_lt(F64x::splat(-2000.)).to_int().cast::<u64>() & u.to_bits())
}
#[test]
fn test_exp2() {
test_f_f::<2>(exp2, rug::Float::exp2, -2000.0..=1024.0, 3.5);
}
pub fn sqrt<const N: usize>(d: F64x<N>) -> F64x<N> {
u05::sqrt(d)
}
pub fn cbrt<const N: usize>(mut d: F64x<N>) -> F64x<N> {
let mut q = F64x::ONE;
let e = ilogbk(d.abs()) + Ix::splat(1);
d = ldexp2k(d, -e);
let t = e.cast::<f64>() + F64x::splat(6144.);
let qu = (t * F64x::splat(1. / 3.)).trunci();
let re = (t - (qu.cast::<f64>() * F64x::splat(3.))).trunci();
q = re
.simd_eq(Ix::splat(1))
.cast::<i64>()
.select(F64x::splat(1.259_921_049_894_873_164_767_210_6), q);
q = re
.simd_eq(Ix::splat(2))
.cast::<i64>()
.select(F64x::splat(1.587_401_051_968_199_474_751_705_6), q);
q = ldexp2k(q, qu - Ix::splat(2048));
q = q.mul_sign(d);
d = d.abs();
let mut x = F64x::splat(-0.640_245_898_480_692_909_870_982)
.mla(d, F64x::splat(2.961_551_030_200_395_118_185_95))
.mla(d, F64x::splat(-5.733_530_609_229_478_436_361_66))
.mla(d, F64x::splat(6.039_903_689_894_587_479_614_07))
.mla(d, F64x::splat(-3.858_419_355_104_449_888_216_32))
.mla(d, F64x::splat(2.230_727_530_249_660_972_572_2));
let mut y = x * x;
y = y * y;
x -= d.mul_sub(y, x) * F64x::splat(1. / 3.);
y = d * x * x;
y = (y - F64x::splat(2. / 3.) * y * y.mla(x, F64x::splat(-1.))) * q;
y
}
#[test]
fn test_cbrt() {
test_f_f::<2>(cbrt, rug::Float::cbrt, f64::MIN..=f64::MAX, 3.5);
}
pub fn hypot<const N: usize>(x: F64x<N>, y: F64x<N>) -> F64x<N> {
let x = x.abs();
let y = y.abs();
let min = x.simd_min(y);
let max = x.simd_max(y);
let t = min / max;
let mut ret = max * t.mla(t, F64x::ONE).sqrt();
ret = min.simd_eq(F64x::ZERO).select(max, ret);
ret = (x.is_nan() | y.is_nan()).select(F64x::NAN, ret);
(x.simd_eq(F64x::INFINITY) | y.simd_eq(F64x::INFINITY)).select(F64x::INFINITY, ret)
}
#[test]
fn test_hypot() {
test_ff_f::<2>(
hypot,
rug::Float::hypot,
-1e307..=1e307,
-1e307..=1e307,
3.5,
);
}