use crate::common::*;
use crate::compound::compoundf::{
COMPOUNDF_EXP2_T, COMPOUNDF_EXP2_U, compoundf_exp2_poly2, compoundf_log2p1_accurate,
compoundf_log2p1_fast,
};
use crate::double_double::DoubleDouble;
use crate::exponents::exp2m1_accurate_tiny;
use crate::rounding::CpuRoundTiesEven;
use std::hint::black_box;
const INVLOG2: f64 = f64::from_bits(0x3ff71547652b82fe);
#[cold]
#[inline(never)]
fn as_compoundm1f_special(x: f32, y: f32) -> f32 {
let nx = x.to_bits();
let ny = y.to_bits();
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ay == 0 {
if ax == 0 {
return if y.is_nan() { x + y } else { 0.0 };
}
if ay == 0 {
if x.is_nan() {
return x + y;
} return if x < -1.0 {
f32::NAN } else {
0.0
}; }
}
let mone = (-1.0f32).to_bits();
if ay >= 0xffu32 << 24 {
if ax > 0xffu32 << 24 {
return x + y;
} if ay == 0xffu32 << 24 {
if nx > mone {
return f32::NAN;
} let sy = ny >> 31; if nx == mone {
return if sy == 0 {
-1. } else {
f32::INFINITY };
}
if x < 0.0 {
return if sy == 0 { -1. } else { f32::INFINITY };
}
if x > 0.0 {
return if sy != 0 { -1. } else { f32::INFINITY };
}
return 0.0;
}
return x + y; }
if nx >= mone || nx >= 0xffu32 << 23 {
if ax == 0xffu32 << 24 {
if (nx >> 31) != 0 {
return f32::NAN;
} return (if (ny >> 31) != 0 { 1.0 / x } else { x }) - 1.;
}
if ax > 0xffu32 << 24 {
return x + y;
} if nx > mone {
return f32::NAN; }
return if (ny >> 31) != 0 {
f32::INFINITY
} else {
-1.0
};
}
-1.
}
#[inline]
pub(crate) fn compoundf_expf_poly(z: f64) -> f64 {
const Q: [u64; 5] = [
0x3fe62e42fefa39ef,
0x3fcebfbdff8098eb,
0x3fac6b08d7045dc3,
0x3f83b2b276ce985d,
0x3f55d8849c67ace4,
];
let z2 = z * z;
let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
dd_fmla(c2, z2, c0) * z
}
fn exp2m1_fast(t: f64) -> f64 {
let k = t.cpu_round_ties_even(); let mut r = t - k; let mut v: u64 = (3.015625 + r).to_bits(); let i: i32 = (v >> 46) as i32 - 0x10010; r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); let mut s = f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1);
let su = unsafe { ((k.to_int_unchecked::<i64>() as u64).wrapping_add(0x3ffu64)) << 52 }; s *= f64::from_bits(su);
let q_poly = compoundf_expf_poly(r);
v = q_poly.to_bits();
let mut err: u64 = 0x3d61d00000000000;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
))]
{
v = f_fmla(f64::from_bits(v), s, s - 1f64).to_bits();
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
)))]
{
let p0 = DoubleDouble::from_full_exact_add(s, -1.);
let z = DoubleDouble::from_exact_mult(f64::from_bits(v), s);
v = DoubleDouble::add(z, p0).to_f64().to_bits();
}
if f64::from_bits(v) < f64::from_bits(0x3d61d00000000000) {
return -1.0;
}
err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
if lb != rb {
return -1.0;
}
f64::from_bits(v)
}
fn compoundf_exp2m1_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
if y == 1.0 {
let res = x;
return res;
}
let k = x_dd.hi.cpu_round_ties_even();
if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
use crate::exponents::GenericExpfBackend;
return exp2m1_accurate_tiny(x_dd.to_f64(), &GenericExpfBackend {}) as f32;
}
let r = x_dd.hi - k; let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
let mut v = (3.015625 + v_dd.hi).to_bits(); let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]);
v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
let q = compoundf_exp2_poly2(v_dd);
let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
let mut q = DoubleDouble::quick_mult(exp2u, q);
q = DoubleDouble::from_exact_add(q.hi, q.lo);
let mut du = unsafe {
k.to_int_unchecked::<i64>()
.wrapping_add(0x3ff)
.wrapping_shl(52) as u64
};
du = f64::from_bits(du).to_bits();
let scale = f64::from_bits(du);
q.hi *= scale;
q.lo *= scale;
let zf: DoubleDouble = DoubleDouble::from_full_exact_add(q.hi, -1.0);
q.lo += zf.lo;
q.hi = zf.hi;
v = q.to_f64().to_bits();
f64::from_bits(v) as f32
}
#[cold]
#[inline(never)]
fn compoundm1f_accurate(x: f32, y: f32) -> f32 {
let mut v = compoundf_log2p1_accurate(x as f64);
v = DoubleDouble::quick_mult_f64(v, y as f64);
compoundf_exp2m1_accurate(v, x, y)
}
#[inline]
pub fn f_compound_m1f(x: f32, y: f32) -> f32 {
let mone = (-1.0f32).to_bits();
let nx = x.to_bits();
let ny = y.to_bits();
if nx >= mone {
return as_compoundm1f_special(x, y);
}
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
return as_compoundm1f_special(x, y);
}
if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
if ax <= 0x62000000u32 {
return 1.0 + y * x;
} let mut s = x as f64 + 1.;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
let mut acc = if iter_count % 2 != 0 { s } else { 1. };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc *= s;
}
}
let dz = if y.is_sign_negative() { 1. / acc } else { acc };
return DoubleDouble::from_full_exact_add(dz, -1.).to_f64() as f32;
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = if ax < 0x62000000u32 {
let t = xd - (xd * xd) * 0.5;
INVLOG2 * t
} else {
compoundf_log2p1_fast(f64::from_bits(tx))
};
let t: u64 = (l * f64::from_bits(ty)).to_bits();
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
if t >= 0x3018bu64 << 46 {
return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
} else if (t >> 63) == 0 {
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
let res = exp2m1_fast(f64::from_bits(t));
if res != -1.0 {
return res as f32;
}
compoundm1f_accurate(x, y)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::compound::compound_m1f::{compoundf_exp2m1_accurate, exp2m1_fast};
use crate::double_double::DoubleDouble;
#[test]
fn test_compoundf() {
assert_eq!(
f_compound_m1f(-0.000000000000001191123, -0.000000000000001191123),
0.0000000000000000000000000000014187741
);
assert_eq!(f_compound_m1f(-0.000000000000001191123, 16.), 1.0);
assert_eq!(f_compound_m1f(0.91123, 16.), 31695.21);
assert_eq!(f_compound_m1f(0.91123, -16.), -0.99996847);
}
#[test]
fn test_compoundf_expm1_fast() {
assert_eq!(exp2m1_fast(3.764), 12.585539943149435);
}
#[test]
fn test_compoundf_expm1_accurate() {
assert_eq!(
compoundf_exp2m1_accurate(DoubleDouble::new(0., 2.74), 12., 53.),
5.680703,
);
}
}