use super::*;
pub fn sinf<const N: usize>(mut d: F32x<N>) -> F32x<N> {
let mut q;
let u: F32x<N>;
let r = d;
if d.abs().simd_lt(F32x::TRIGRANGEMAX2).all() {
q = (d * F32x::FRAC_1_PI).roundi();
u = q.cast();
d = u.mla(-F32x::PI_A2, d);
d = u.mla(-F32x::PI_B2, d);
d = u.mla(-F32x::PI_C2, d);
} else if d.abs().simd_lt(F32x::TRIGRANGEMAX).all() {
q = (d * F32x::FRAC_1_PI).roundi();
u = q.cast();
d = u.mla(-F32x::PI_A, d);
d = u.mla(-F32x::PI_B, d);
d = u.mla(-F32x::PI_C, d);
d = u.mla(-F32x::PI_D, d);
} else {
let (mut dfidf, dfii) = rempif(d);
q = dfii & I32x::splat(3);
q = q
+ q
+ dfidf
.0
.simd_gt(F32x::ZERO)
.select(I32x::splat(2), I32x::splat(1));
q >>= I32x::splat(2);
let o = (dfii & I32x::splat(1)).simd_eq(I32x::splat(1));
let mut x = Doubled::new(
F32x::splat(crate::f32::D_PI.0 * -0.5).mul_sign(dfidf.0),
F32x::splat(crate::f32::D_PI.1 * -0.5).mul_sign(dfidf.0),
);
x = dfidf + x;
dfidf = o.select_doubled(x, dfidf);
d = F32x::from(dfidf);
d = F32x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | d.to_bits());
}
let s = d * d;
d = F32x::from_bits(
((q & I32x::splat(1)).simd_eq(I32x::splat(1)).to_int().cast() & F32x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let mut u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
.mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
.mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
.mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
u = s * (u * d) + d;
r.is_neg_zero().select(r, u)
}
pub fn sinf_deterministic<const N: usize>(mut d: F32x<N>) -> F32x<N> {
let r = d;
let mut q = (d * F32x::FRAC_1_PI).roundi();
let u = q.cast::<f32>();
d = u.mla(-F32x::PI_A2, d);
d = u.mla(-F32x::PI_B2, d);
d = u.mla(-F32x::PI_C2, d);
let g = r.abs().simd_lt(F32x::TRIGRANGEMAX2);
if !g.all() {
let s = q.cast::<f32>();
let mut u = s.mla(-F32x::PI_A, r);
u = s.mla(-F32x::PI_B, u);
u = s.mla(-F32x::PI_C, u);
u = s.mla(-F32x::PI_D, u);
d = g.select(d, u);
let g = r.abs().simd_lt(F32x::TRIGRANGEMAX);
if !g.all() {
let (mut dfidf, dfii) = rempif(d);
let mut q2 = dfii & I32x::splat(3);
q2 = q2
+ q2
+ dfidf
.0
.simd_gt(F32x::ZERO)
.select(I32x::splat(2), I32x::splat(1));
q2 >>= I32x::splat(2);
let o = (dfii & I32x::splat(1)).simd_eq(I32x::splat(1));
let mut x = Doubled::new(
F32x::splat(crate::f32::D_PI.0 * -0.5).mul_sign(dfidf.0),
F32x::splat(crate::f32::D_PI.1 * -0.5).mul_sign(dfidf.0),
);
x = dfidf + x;
dfidf = o.select_doubled(x, dfidf);
u = F32x::from(dfidf);
u = F32x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | u.to_bits());
q = g.select(q, q2);
d = g.select(d, u);
}
}
let s = d * d;
d = F32x::from_bits(
((q & I32x::splat(1)).simd_eq(I32x::splat(1)).to_int().cast() & F32x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let mut u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
.mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
.mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
.mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
u = s * (u * d) + d;
r.is_neg_zero().select(r, u)
}
#[test]
fn test_sinf() {
test_f_f::<2>(sinf, rug::Float::sin, f32::MIN..=f32::MAX, 3.5);
test_f_f::<2>(
sinf_deterministic,
rug::Float::sin,
f32::MIN..=f32::MAX,
3.5,
);
}
pub fn cosf<const N: usize>(mut d: F32x<N>) -> F32x<N> {
let mut q;
let r = d;
if d.abs().simd_lt(F32x::TRIGRANGEMAX2).all() {
q = (d * F32x::FRAC_1_PI - F32x::HALF).roundi();
q = q + q + I32x::splat(1);
let u = q.cast::<f32>();
d = u.mla(-F32x::PI_A2 * F32x::HALF, d);
d = u.mla(-F32x::PI_B2 * F32x::HALF, d);
d = u.mla(-F32x::PI_C2 * F32x::HALF, d);
} else if d.abs().simd_lt(F32x::TRIGRANGEMAX).all() {
q = (d * F32x::FRAC_1_PI - F32x::HALF).roundi();
q = q + q + I32x::splat(1);
let u = q.cast::<f32>();
d = u.mla(-F32x::PI_A * F32x::HALF, d);
d = u.mla(-F32x::PI_B * F32x::HALF, d);
d = u.mla(-F32x::PI_C * F32x::HALF, d);
d = u.mla(-F32x::PI_D * F32x::HALF, d);
} else {
let (mut dfidf, dfii) = rempif(d);
q = dfii & I32x::splat(3);
q = q
+ q
+ dfidf
.0
.simd_gt(F32x::ZERO)
.select(I32x::splat(8), I32x::splat(7));
q >>= I32x::splat(1);
let o = (dfii & I32x::splat(1)).simd_eq(I32x::splat(0));
let y = dfidf
.0
.simd_gt(F32x::ZERO)
.select(F32x::ZERO, F32x::splat(-1.));
let mut x = Doubled::new(
F32x::splat(crate::f32::D_PI.0 * -0.5).mul_sign(y),
F32x::splat(crate::f32::D_PI.1 * -0.5).mul_sign(y),
);
x = dfidf + x;
dfidf = o.select_doubled(x, dfidf);
d = F32x::from(dfidf);
d = F32x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | d.to_bits());
}
let s = d * d;
d = F32x::from_bits(
((q & I32x::splat(2)).simd_eq(I32x::splat(0)).to_int().cast() & F32x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
.mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
.mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
.mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
s * (u * d) + d
}
pub fn cosf_deterministic<const N: usize>(mut d: F32x<N>) -> F32x<N> {
let r = d;
let mut q = (d * F32x::FRAC_1_PI - F32x::HALF).roundi();
q = q + q + I32x::splat(1);
let u = q.cast::<f32>();
d = u.mla(-F32x::PI_A2 * F32x::HALF, d);
d = u.mla(-F32x::PI_B2 * F32x::HALF, d);
d = u.mla(-F32x::PI_C2 * F32x::HALF, d);
let g = r.abs().simd_lt(F32x::TRIGRANGEMAX2);
if !g.all() {
let s = q.cast::<f32>();
let mut u = s.mla(-F32x::PI_A * F32x::HALF, r);
u = s.mla(-F32x::PI_B * F32x::HALF, u);
u = s.mla(-F32x::PI_C * F32x::HALF, u);
u = s.mla(-F32x::PI_D * F32x::HALF, u);
d = g.select(d, u);
let g = r.abs().simd_lt(F32x::TRIGRANGEMAX);
if !g.all() {
let (mut dfidf, dfii) = rempif(d);
let mut q2 = dfii & I32x::splat(3);
q2 = q2
+ q2
+ dfidf
.0
.simd_gt(F32x::ZERO)
.select(I32x::splat(8), I32x::splat(7));
q2 >>= I32x::splat(1);
let o = (dfii & I32x::splat(1)).simd_eq(I32x::splat(0));
let y = dfidf
.0
.simd_gt(F32x::ZERO)
.select(F32x::ZERO, F32x::splat(-1.));
let mut x = Doubled::new(
F32x::splat(crate::f32::D_PI.0 * -0.5).mul_sign(y),
F32x::splat(crate::f32::D_PI.1 * -0.5).mul_sign(y),
);
x = dfidf + x;
dfidf = o.select_doubled(x, dfidf);
u = F32x::from(dfidf);
u = F32x::from_bits((r.is_infinite() | r.is_nan()).to_int().cast() | u.to_bits());
q = g.select(q, q2);
d = g.select(d, u);
}
}
let s = d * d;
d = F32x::from_bits(
((q & I32x::splat(2)).simd_eq(I32x::splat(0)).to_int().cast() & F32x::NEG_ZERO.to_bits())
^ d.to_bits(),
);
let u = F32x::splat(2.608_315_980_978_659_354_150_3_e-6)
.mla(s, F32x::splat(-0.000_198_106_907_191_686_332_225_8))
.mla(s, F32x::splat(0.008_333_078_585_565_090_179_443_36))
.mla(s, F32x::splat(-0.166_666_597_127_914_428_710_938));
s * (u * d) + d
}
#[test]
fn test_cosf() {
test_f_f::<2>(cosf, rug::Float::cos, f32::MIN..=f32::MAX, 3.5);
test_f_f::<2>(
cosf_deterministic,
rug::Float::cos,
f32::MIN..=f32::MAX,
3.5,
);
}
pub fn sincosf<const N: usize>(d: F32x<N>) -> (F32x<N>, F32x<N>) {
let q;
let mut s = d;
if d.abs().simd_lt(F32x::TRIGRANGEMAX2).all() {
q = (d * F32x::FRAC_2_PI).roundi();
let u = q.cast::<f32>();
s = u.mla(-F32x::PI_A2 * F32x::HALF, s);
s = u.mla(-F32x::PI_B2 * F32x::HALF, s);
s = u.mla(-F32x::PI_C2 * F32x::HALF, s);
} else if d.abs().simd_lt(F32x::TRIGRANGEMAX).all() {
q = (d * F32x::FRAC_2_PI).roundi();
let u = q.cast::<f32>();
s = u.mla(-F32x::PI_A * F32x::HALF, s);
s = u.mla(-F32x::PI_B * F32x::HALF, s);
s = u.mla(-F32x::PI_C * F32x::HALF, s);
s = u.mla(-F32x::PI_D * F32x::HALF, s);
} else {
let (dfidf, dfii) = rempif(d);
q = dfii;
s = F32x::from(dfidf);
s = F32x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | s.to_bits());
}
let t = s;
s = s * s;
let u = F32x::splat(-0.000_195_169_282_960_705_459_117_889)
.mla(s, F32x::splat(0.008_332_157_507_538_795_471_191_41))
.mla(s, F32x::splat(-0.166_666_537_523_269_653_320_312));
let rx = (u * s).mla(t, t);
let rx = d.is_neg_zero().select(F32x::NEG_ZERO, rx);
let u = F32x::splat(-2.718_118_423_672_422_068_193_55_e-7)
.mla(s, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
.mla(s, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
.mla(s, F32x::splat(0.041_666_664_183_139_801_025_390_6))
.mla(s, F32x::splat(-0.5));
let ry = s.mla(u, F32x::ONE);
let o = (q & I32x::splat(1)).simd_eq(I32x::splat(0));
let mut rsin = o.select(rx, ry);
let mut rcos = o.select(ry, rx);
let o = (q & I32x::splat(2)).simd_eq(I32x::splat(2));
rsin = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ rsin.to_bits());
let o = ((q + I32x::splat(1)) & I32x::splat(2)).simd_eq(I32x::splat(2));
rcos = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ rcos.to_bits());
(rsin, rcos)
}
pub fn sincosf_deterministic<const N: usize>(d: F32x<N>) -> (F32x<N>, F32x<N>) {
let mut q = (d * F32x::FRAC_2_PI).roundi();
let u = q.cast::<f32>();
let mut s = u.mla(-F32x::PI_A2 * F32x::HALF, d);
s = u.mla(-F32x::PI_B2 * F32x::HALF, s);
s = u.mla(-F32x::PI_C2 * F32x::HALF, s);
let g = d.abs().simd_lt(F32x::TRIGRANGEMAX2);
if !g.all() {
let q2 = (d * F32x::FRAC_2_PI).roundi();
let u = q2.cast::<f32>();
let mut t = u.mla(-F32x::PI_A * F32x::HALF, d);
t = u.mla(-F32x::PI_B * F32x::HALF, t);
t = u.mla(-F32x::PI_C * F32x::HALF, t);
t = u.mla(-F32x::PI_D * F32x::HALF, t);
q = g.select(q, q2);
s = g.select(s, t);
let g = d.abs().simd_lt(F32x::TRIGRANGEMAX);
if !g.all() {
let (dfidf, dfii) = rempif(d);
let mut t = F32x::from(dfidf);
t = F32x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | t.to_bits());
q = g.select(q, dfii);
s = g.select(s, t);
}
}
let t = s;
s = s * s;
let u = F32x::splat(-0.000_195_169_282_960_705_459_117_889)
.mla(s, F32x::splat(0.008_332_157_507_538_795_471_191_41))
.mla(s, F32x::splat(-0.166_666_537_523_269_653_320_312));
let mut rx = (u * s).mla(t, t);
rx = d.is_neg_zero().select(F32x::NEG_ZERO, rx);
let u = F32x::splat(-2.718_118_423_672_422_068_193_55_e-7)
.mla(s, F32x::splat(2.479_904_469_510_074_704_885_48_e-5))
.mla(s, F32x::splat(-0.001_388_887_874_782_085_418_701_17))
.mla(s, F32x::splat(0.041_666_664_183_139_801_025_390_6))
.mla(s, F32x::splat(-0.5));
let ry = s.mla(u, F32x::ONE);
let o = (q & I32x::splat(1)).simd_eq(I32x::splat(0));
let mut rsin = o.select(rx, ry);
let mut rcos = o.select(ry, rx);
let o = (q & I32x::splat(2)).simd_eq(I32x::splat(2));
rsin = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ rsin.to_bits());
let o = ((q + I32x::splat(1)) & I32x::splat(2)).simd_eq(I32x::splat(2));
rcos = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ rcos.to_bits());
(rsin, rcos)
}
#[test]
fn test_sincosf() {
test_f_ff::<2>(
sincosf,
|in1| {
let prec = in1.prec();
in1.sin_cos(rug::Float::new(prec))
},
f32::MIN..=f32::MAX,
3.5,
);
test_f_ff::<2>(
sincosf_deterministic,
|in1| {
let prec = in1.prec();
in1.sin_cos(rug::Float::new(prec))
},
f32::MIN..=f32::MAX,
3.5,
);
}
pub fn tanf<const N: usize>(d: F32x<N>) -> F32x<N> {
let q;
let mut x = d;
if d.abs().simd_lt(F32x::TRIGRANGEMAX2 * F32x::HALF).all() {
q = (d * F32x::FRAC_2_PI).roundi();
let u = q.cast::<f32>();
x = u.mla(-F32x::PI_A2 * F32x::HALF, x);
x = u.mla(-F32x::PI_B2 * F32x::HALF, x);
x = u.mla(-F32x::PI_C2 * F32x::HALF, x);
} else if d.abs().simd_lt(F32x::TRIGRANGEMAX).all() {
q = (d * (F32x::splat(2.) * F32x::FRAC_1_PI)).roundi();
let u = q.cast::<f32>();
x = u.mla(-F32x::PI_A * F32x::HALF, x);
x = u.mla(-F32x::PI_B * F32x::HALF, x);
x = u.mla(-F32x::PI_C * F32x::HALF, x);
x = u.mla(-F32x::PI_D * F32x::HALF, x);
} else {
let (dfidf, dfii) = rempif(d);
q = dfii;
x = F32x::from(dfidf);
x = F32x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | x.to_bits());
x = d.is_neg_zero().select(d, x);
}
let s = x * x;
let o = (q & I32x::splat(1)).simd_eq(I32x::splat(1));
x = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ x.to_bits());
let mut u = if cfg!(feature = "enable_neon32") {
F32x::splat(0.009_272_458_031_773_567_199_707_03)
.mla(s, F32x::splat(0.003_319_849_958_643_317_222_595_21))
.mla(s, F32x::splat(0.024_299_807_846_546_173_095_703_1))
.mla(s, F32x::splat(0.053_449_530_154_466_629_028_320_3))
.mla(s, F32x::splat(0.133_383_005_857_467_651_367_188))
.mla(s, F32x::splat(0.333_331_853_151_321_411_132_812))
} else {
let s2 = s * s;
let s4 = s2 * s2;
F32x::poly6(
s,
s2,
s4,
0.009_272_458_031_773_567_199_707_03,
0.003_319_849_958_643_317_222_595_21,
0.024_299_807_846_546_173_095_703_1,
0.053_449_530_154_466_629_028_320_3,
0.133_383_005_857_467_651_367_188,
0.333_331_853_151_321_411_132_812,
)
};
u = s.mla(u * x, x);
o.select(u.recip(), u)
}
pub fn tanf_deterministic<const N: usize>(d: F32x<N>) -> F32x<N> {
let mut q = (d * F32x::FRAC_2_PI).roundi();
let u = q.cast::<f32>();
let mut x = u.mla(-F32x::PI_A2 * F32x::HALF, d);
x = u.mla(-F32x::PI_B2 * F32x::HALF, x);
x = u.mla(-F32x::PI_C2 * F32x::HALF, x);
let g = d.abs().simd_lt(F32x::TRIGRANGEMAX2 * F32x::HALF);
if !g.all() {
let q2 = (d * F32x::FRAC_2_PI).roundi();
let s = q.cast::<f32>();
let mut u = s.mla(-F32x::PI_A * F32x::HALF, d);
u = s.mla(-F32x::PI_B * F32x::HALF, u);
u = s.mla(-F32x::PI_C * F32x::HALF, u);
u = s.mla(-F32x::PI_D * F32x::HALF, u);
q = g.select(q, q2);
x = g.select(x, u);
let g = d.abs().simd_lt(F32x::TRIGRANGEMAX);
if !g.all() {
let (dfidf, dfii) = rempif(d);
u = F32x::from(dfidf);
u = F32x::from_bits((d.is_infinite() | d.is_nan()).to_int().cast() | u.to_bits());
u = d.is_neg_zero().select(d, u);
q = g.select(q, dfii);
x = g.select(x, u);
}
}
let s = x * x;
let o = (q & I32x::splat(1)).simd_eq(I32x::splat(1));
x = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ x.to_bits());
let mut u = if cfg!(feature = "enable_neon32") {
F32x::splat(0.009_272_458_031_773_567_199_707_03)
.mla(s, F32x::splat(0.003_319_849_958_643_317_222_595_21))
.mla(s, F32x::splat(0.024_299_807_846_546_173_095_703_1))
.mla(s, F32x::splat(0.053_449_530_154_466_629_028_320_3))
.mla(s, F32x::splat(0.133_383_005_857_467_651_367_188))
.mla(s, F32x::splat(0.333_331_853_151_321_411_132_812))
} else {
let s2 = s * s;
let s4 = s2 * s2;
F32x::poly6(
s,
s2,
s4,
0.009_272_458_031_773_567_199_707_03,
0.003_319_849_958_643_317_222_595_21,
0.024_299_807_846_546_173_095_703_1,
0.053_449_530_154_466_629_028_320_3,
0.133_383_005_857_467_651_367_188,
0.333_331_853_151_321_411_132_812,
)
};
u = s.mla(u * x, x);
o.select(u.recip(), u)
}
#[test]
fn test_tanf() {
test_f_f::<2>(tanf, rug::Float::tan, f32::MIN..=f32::MAX, 3.5);
test_f_f::<2>(
tanf_deterministic,
rug::Float::tan,
f32::MIN..=f32::MAX,
3.5,
);
}
pub fn sincospif<const N: usize>(d: F32x<N>) -> (F32x<N>, F32x<N>) {
let u = d * F32x::splat(4.);
let q = u.trunci();
let q = (q + ((q.cast() >> U32x::splat(31)).cast() ^ I32x::splat(1))) & I32x::splat(!1);
let s = u - q.cast();
let t = s;
let s = s * s;
let u = F32x::splat(-0.360_092_526_5_e-4)
.mla(s, F32x::splat(0.249_008_811_1_e-2))
.mla(s, F32x::splat(-0.807_455_107_6_e-1))
.mla(s, F32x::splat(0.785_398_185_3));
let rx = u * t;
let u = F32x::splat(0.353_981_522_5_e-5)
.mla(s, F32x::splat(-0.325_957_400_5_e-3))
.mla(s, F32x::splat(0.158_543_158_3_e-1))
.mla(s, F32x::splat(-0.308_425_128_5))
.mla(s, F32x::ONE);
let ry = u;
let o = (q & I32x::splat(2)).simd_eq(I32x::splat(0));
let mut rsin = o.select(rx, ry);
let mut rcos = o.select(ry, rx);
let o = (q & I32x::splat(4)).simd_eq(I32x::splat(4));
rsin = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ rsin.to_bits());
let o = ((q + I32x::splat(2)) & I32x::splat(4)).simd_eq(I32x::splat(4));
rcos = F32x::from_bits((o.to_int().cast() & F32x::NEG_ZERO.to_bits()) ^ rcos.to_bits());
let o = d.abs().simd_gt(F32x::splat(1e+7));
rsin = F32x::from_bits(!o.to_int().cast::<u32>() & rsin.to_bits());
rcos = F32x::from_bits(!o.to_int().cast::<u32>() & rcos.to_bits());
let o = d.is_infinite();
rsin = F32x::from_bits(o.to_int().cast() | rsin.to_bits());
rcos = F32x::from_bits(o.to_int().cast() | rcos.to_bits());
(rsin, rcos)
}
#[test]
fn test_sincospif() {
use rug::{float::Constant, Float};
let rangemax2 = 1e+7 / 4.;
test_f_ff::<2>(
sincospif,
|mut in1| {
let prec = in1.prec();
in1.set_prec(prec * 2);
(in1 * Float::with_val(prec * 2, Constant::Pi)).sin_cos(Float::new(prec))
},
-rangemax2..=rangemax2,
2.,
);
}
#[inline]
fn atan2kf<const N: usize>(y: F32x<N>, x: F32x<N>) -> F32x<N> {
let q = x.is_sign_negative().to_int() & I32x::splat(-2);
let x = x.abs();
let q = x.simd_lt(y).select(q + I32x::splat(1), q);
let p = x.simd_lt(y);
let s = p.select(-x, y);
let mut t = x.simd_max(y);
let s = s / t;
t = s * s;
let t2 = t * t;
let t4 = t2 * t2;
let u = F32x::poly8(
t,
t2,
t4,
0.002_823_638_962_581_753_730_773_93,
-0.015_956_902_876_496_315_002_441_4,
0.042_504_988_610_744_476_318_359_4,
-0.074_890_092_015_266_418_457_031_2,
0.106_347_933_411_598_205_566_406,
-0.142_027_363_181_114_196_777_344,
0.199_926_957_488_059_997_558_594,
-0.333_331_018_686_294_555_664_062,
);
let t = s.mla(t * u, s);
q.cast::<f32>().mla(F32x::FRAC_PI_2, t)
}
pub fn atan2f<const N: usize>(y: F32x<N>, x: F32x<N>) -> F32x<N> {
let mut r = atan2kf(y.abs(), x);
r = r.mul_sign(x);
r = (x.is_infinite() | x.simd_eq(F32x::ZERO)).select(
F32x::FRAC_PI_2 - visinf2_vf_vf_vf(x, F32x::FRAC_PI_2.mul_sign(x)),
r,
);
r = y.is_infinite().select(
F32x::FRAC_PI_2 - visinf2_vf_vf_vf(x, F32x::FRAC_PI_4.mul_sign(x)),
r,
);
r = y.simd_eq(F32x::ZERO).select(
F32x::from_bits(x.is_sign_negative().to_int().cast() & F32x::PI.to_bits()),
r,
);
F32x::from_bits((x.is_nan() | y.is_nan()).to_int().cast() | r.mul_sign(y).to_bits())
}
#[test]
fn test_atan2f() {
test_ff_f::<2>(
atan2f,
rug::Float::atan2,
f32::MIN..=f32::MAX,
f32::MIN..=f32::MAX,
3.5,
);
}
pub fn asinf<const N: usize>(d: F32x<N>) -> F32x<N> {
let o = d.abs().simd_lt(F32x::HALF);
let x2 = o.select(d * d, (F32x::ONE - d.abs()) * F32x::HALF);
let x = o.select(d.abs(), x2.sqrt());
let u = F32x::splat(0.419_745_482_5_e-1)
.mla(x2, F32x::splat(0.242_404_602_5_e-1))
.mla(x2, F32x::splat(0.454_742_386_9_e-1))
.mla(x2, F32x::splat(0.749_502_927_1_e-1))
.mla(x2, F32x::splat(0.166_667_729_6))
.mla(x * x2, x);
let r = o.select(u, u.mla(F32x::splat(-2.), F32x::FRAC_PI_2));
r.mul_sign(d)
}
#[test]
fn test_asinf() {
test_f_f::<2>(asinf, rug::Float::asin, -1.0..=1.0, 3.5);
}
pub fn acosf<const N: usize>(d: F32x<N>) -> F32x<N> {
let o = d.abs().simd_lt(F32x::HALF);
let x2 = o.select(d * d, (F32x::ONE - d.abs()) * F32x::HALF);
let mut x = o.select(d.abs(), x2.sqrt());
x = d.abs().simd_eq(F32x::ONE).select(F32x::ZERO, x);
let u = F32x::splat(0.419_745_482_5_e-1)
.mla(x2, F32x::splat(0.242_404_602_5_e-1))
.mla(x2, F32x::splat(0.454_742_386_9_e-1))
.mla(x2, F32x::splat(0.749_502_927_1_e-1))
.mla(x2, F32x::splat(0.166_667_729_6))
* (x2 * x);
let y = F32x::FRAC_PI_2 - (x.mul_sign(d) + u.mul_sign(d));
x += u;
let r = o.select(y, x * F32x::splat(2.));
(!o & d.simd_lt(F32x::ZERO)).select(
Doubled::<F32x<N>>::splat(crate::f32::D_PI)
.add_checked(-r)
.0,
r,
)
}
#[test]
fn test_acosf() {
test_f_f::<2>(acosf, rug::Float::acos, -1.0..=1.0, 3.5);
}
pub fn atanf<const N: usize>(d: F32x<N>) -> F32x<N> {
let q = d.is_sign_negative().to_int() & I32x::splat(2);
let s = d.abs();
let q = F32x::ONE.simd_lt(s).select(q + I32x::splat(1), q);
let s = F32x::ONE.simd_lt(s).select(s.recip(), s);
let mut t = s * s;
let t2 = t * t;
let t4 = t2 * t2;
let u = F32x::poly8(
t,
t2,
t4,
0.002_823_638_962_581_753_730_773_93,
-0.015_956_902_876_496_315_002_441_4,
0.042_504_988_610_744_476_318_359_4,
-0.074_890_092_015_266_418_457_031_2,
0.106_347_933_411_598_205_566_406,
-0.142_027_363_181_114_196_777_344,
0.199_926_957_488_059_997_558_594,
-0.333_331_018_686_294_555_664_062,
);
t = s.mla(t * u, s);
t = (q & I32x::splat(1))
.simd_eq(I32x::splat(1))
.select(F32x::FRAC_PI_2 - t, t);
t = F32x::from_bits(
((q & I32x::splat(2)).simd_eq(I32x::splat(2)).to_int().cast() & F32x::NEG_ZERO.to_bits())
^ t.to_bits(),
);
if cfg!(feature = "enable_neon32") || cfg!(feature = "enable_neon32vfpv4") {
t = d.is_infinite().select(
F32x::splat(1.587_401_051_968_199_474_751_705_6).mul_sign(d),
t,
);
}
t
}
#[test]
fn test_atanf() {
test_f_f::<2>(atanf, rug::Float::atan, f32::MIN..=f32::MAX, 3.5);
}
pub fn sinhf<const N: usize>(x: F32x<N>) -> F32x<N> {
let e = expm1fk(x.abs());
let mut y = (e + F32x::splat(2.)) / (e + F32x::ONE);
y *= F32x::HALF * e;
y = (x.abs().simd_gt(F32x::splat(88.)) | y.is_nan()).select(F32x::INFINITY, y);
y = y.mul_sign(x);
F32x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
}
#[test]
fn test_sinhf() {
test_f_f::<2>(sinhf, rug::Float::sinh, -88.0..=88.0, 3.5);
}
pub fn coshf<const N: usize>(x: F32x<N>) -> F32x<N> {
let e = u10::expf(x.abs());
let mut y = F32x::HALF.mla(e, F32x::HALF / e);
y = (x.abs().simd_gt(F32x::splat(88.)) | y.is_nan()).select(F32x::INFINITY, y);
F32x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
}
#[test]
fn test_coshf() {
test_f_f::<2>(coshf, rug::Float::cosh, -88.0..=88.0, 3.5);
}
pub fn tanhf<const N: usize>(x: F32x<N>) -> F32x<N> {
let d = expm1fk(F32x::splat(2.) * x.abs());
let mut y = d / (F32x::splat(2.) + d);
y = (x.abs().simd_gt(F32x::splat(8.664_339_742)) | y.is_nan()).select(F32x::ONE, y);
y = y.mul_sign(x);
F32x::from_bits(x.is_nan().to_int().cast() | y.to_bits())
}
#[test]
fn test_tanhf() {
test_f_f::<2>(tanhf, rug::Float::tanh, -8.7..=8.7, 3.5);
}
pub fn logf<const N: usize>(mut d: F32x<N>) -> F32x<N> {
let m;
let ef =
{
let o = d.simd_lt(F32x::splat(f32::MIN_POSITIVE));
d = o.select(d * (F32x::F1_32 * F32x::F1_32), d);
let mut e = ilogb2kf(d * F32x::splat(1. / 0.75));
m = ldexp3kf(d, -e);
e = o.select(e - I32x::splat(64), e);
e.cast()
};
let mut x = (m - F32x::ONE) / (F32x::ONE + m);
let x2 = x * x;
let t = F32x::splat(0.239_282_846_450_805_664_062_5)
.mla(x2, F32x::splat(0.285_182_118_415_832_519_531_25))
.mla(x2, F32x::splat(0.400_005_877_017_974_853_515_625))
.mla(x2, F32x::splat(0.666_666_686_534_881_591_796_875))
.mla(x2, F32x::splat(2.));
x = x.mla(t, F32x::splat(0.693_147_180_559_945_286_226_764) * ef);
x = d.simd_eq(F32x::INFINITY).select(F32x::INFINITY, x);
x = (d.simd_lt(F32x::ZERO) | d.is_nan()).select(F32x::NAN, x);
d.simd_eq(F32x::ZERO).select(F32x::NEG_INFINITY, x)
}
#[test]
fn test_logf() {
test_f_f::<2>(logf, rug::Float::ln, 0.0..=f32::MAX, 3.5);
}
pub fn log2f<const N: usize>(mut d: F32x<N>) -> F32x<N> {
let (m, e) = {
let o = d.simd_lt(F32x::splat(f32::MIN_POSITIVE));
d = o.select(d * (F32x::F1_32 * F32x::F1_32), d);
let e = ilogb2kf(d * F32x::splat(1./0.75));
(ldexp3kf(d, -e), o.select(e - I32x::splat(64), e))
};
let x = (m - F32x::ONE) / (m + F32x::ONE);
let x2 = x * x;
let t = F32x::splat(0.437_408_834_7)
.mla(x2, F32x::splat(0.576_484_382_2))
.mla(x2, F32x::splat(0.961_802_423));
{
let mut r = (x2 * x).mla(t, x.mla(F32x::splat(0.288_539_004_3_e+1), e.cast()));
r = d.simd_eq(F32x::INFINITY).select(F32x::INFINITY, r);
r = (d.simd_lt(F32x::ZERO) | d.is_nan()).select(F32x::NAN, r);
d.simd_eq(F32x::ZERO).select(F32x::NEG_INFINITY, r)
}
}
#[test]
fn test_log2f() {
test_f_f::<2>(log2f, rug::Float::log2, 0.0..=f32::MAX, 3.5);
}
pub fn exp10f<const N: usize>(d: F32x<N>) -> F32x<N> {
let mut u = (d * F32x::LOG10_2).round();
let q = u.roundi();
let mut s = u.mla(-F32x::L10_U, d);
s = u.mla(-F32x::L10_L, s);
u = F32x::splat(0.206_400_498_7)
.mla(s, F32x::splat(0.541_787_743_6))
.mla(s, F32x::splat(0.117_128_682_1_e+1))
.mla(s, F32x::splat(0.203_465_604_8_e+1))
.mla(s, F32x::splat(0.265_094_876_3_e+1))
.mla(s, F32x::splat(0.230_258_512_5_e+1))
.mla(s, F32x::splat(0.1_e+1));
u = ldexp2kf(u, q);
u = d
.simd_gt(F32x::splat(38.531_839_419_103_623_894_138_7))
.select(F32x::INFINITY, u);
F32x::from_bits(!d.simd_lt(F32x::splat(-50.)).to_int().cast::<u32>() & u.to_bits())
}
#[test]
fn test_exp10f() {
test_f_f::<2>(exp10f, rug::Float::exp10, -38.54..=38.54, 3.5);
}
pub fn exp2f<const N: usize>(d: F32x<N>) -> F32x<N> {
let mut u = d.round();
let q = u.roundi();
let s = d - u;
u = F32x::splat(0.153_592_089_2_e-3)
.mla(s, F32x::splat(0.133_926_270_1_e-2))
.mla(s, F32x::splat(0.961_838_476_4_e-2))
.mla(s, F32x::splat(0.555_034_726_9_e-1))
.mla(s, F32x::splat(0.240_226_447_6))
.mla(s, F32x::splat(0.693_147_182_5))
.mla(s, F32x::splat(0.1_e+1));
u = ldexp2kf(u, q);
u = d.simd_ge(F32x::splat(128.)).select(F32x::INFINITY, u);
F32x::from_bits(!d.simd_lt(F32x::splat(-150.)).to_int().cast::<u32>() & u.to_bits())
}
#[test]
fn test_exp2f() {
test_f_f::<2>(exp2f, rug::Float::exp2, -150.0..=128.0, 3.5);
}
#[cfg(all(
not(feature = "enable_neon32"),
not(feature = "enable_neon32vfpv4"),
// not(feature = "enable_vecext")
))]
pub fn sqrtf<const N: usize>(d: F32x<N>) -> F32x<N> {
d.sqrt()
}
#[test]
fn test_sqrtf() {
test_f_f::<2>(sqrtf, rug::Float::sqrt, 0.0..=f32::MAX, 3.5);
}
pub fn cbrtf<const N: usize>(mut d: F32x<N>) -> F32x<N> {
let mut q = F32x::ONE;
let e = ilogbkf(d.abs()) + I32x::splat(1);
d = ldexp2kf(d, -e);
let t = e.cast::<f32>() + F32x::splat(6144.);
let qu = (t * F32x::splat(1. / 3.)).trunci();
let re = (t - qu.cast::<f32>() * F32x::splat(3.)).trunci();
q = re
.simd_eq(I32x::splat(1))
.select(F32x::splat(1.259_921_049_894_873_164_767_210_6), q);
q = re
.simd_eq(I32x::splat(2))
.select(F32x::splat(1.587_401_051_968_199_474_751_705_6), q);
q = ldexp2kf(q, qu - I32x::splat(2048));
q = q.mul_sign(d);
d = d.abs();
let x = F32x::splat(-0.601_564_466_953_277_587_890_625)
.mla(d, F32x::splat(2.820_889_234_542_846_679_687_5))
.mla(d, F32x::splat(-5.532_182_216_644_287_109_375))
.mla(d, F32x::splat(5.898_262_500_762_939_453_125))
.mla(d, F32x::splat(-3.809_541_702_270_507_812_5))
.mla(d, F32x::splat(2.224_125_623_703_002_929_687_5));
let mut y = d * x * x;
y = (y - F32x::splat(2. / 3.) * y * y.mla(x, F32x::splat(-1.))) * q;
y
}
#[test]
fn test_cbrtf() {
test_f_f::<2>(cbrtf, rug::Float::cbrt, f32::MIN..=f32::MAX, 3.5);
}
pub fn hypotf<const N: usize>(x: F32x<N>, y: F32x<N>) -> F32x<N> {
let x = x.abs();
let y = y.abs();
let min = x.simd_min(y);
let max = x.simd_max(y);
let t = min / max;
let mut ret = max * t.mla(t, F32x::ONE).sqrt();
ret = min.simd_eq(F32x::ZERO).select(max, ret);
ret = (x.is_nan() | y.is_nan()).select(F32x::NAN, ret);
(x.simd_eq(F32x::INFINITY) | y.simd_eq(F32x::INFINITY)).select(F32x::INFINITY, ret)
}
#[test]
fn test_hypotf() {
test_ff_f::<2>(
hypotf,
rug::Float::hypot,
f32::MIN..=f32::MAX,
f32::MIN..=f32::MAX,
3.5,
);
}