use crate::common::{dd_fmla, is_integerf};
use crate::double_double::DoubleDouble;
use crate::rounding::CpuRoundTiesEven;
use std::hint::black_box;
#[cold]
#[inline(never)]
fn as_compoundf_special(x: f32, y: f32) -> f32 {
let nx = x.to_bits();
let ny = y.to_bits();
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ay == 0 {
if ax == 0 {
return if y.is_nan() { x + y } else { 1.0 };
}
if ay == 0 {
if x.is_nan() {
return x + y;
} return if x < -1.0 {
f32::NAN } else {
1.0
}; }
}
let mone = (-1.0f32).to_bits();
if ay >= 0xffu32 << 24 {
if ax > 0xffu32 << 24 {
return x + y;
} if ay == 0xffu32 << 24 {
if nx > mone {
return f32::NAN;
} let sy = ny >> 31; if nx == mone {
return if sy == 0 {
0.0 } else {
f32::INFINITY };
}
if x < 0.0 {
return if sy == 0 { 0.0 } else { f32::INFINITY };
}
if x > 0.0 {
return if sy != 0 { 0.0 } else { f32::INFINITY };
}
return 1.0;
}
return x + y; }
if nx >= mone || nx >= 0xffu32 << 23 {
if ax == 0xffu32 << 24 {
if (nx >> 31) != 0 {
return f32::NAN;
} return if (ny >> 31) != 0 { 1.0 / x } else { x };
}
if ax > 0xffu32 << 24 {
return x + y;
} if nx > mone {
return f32::NAN; }
return if (ny >> 31) != 0 {
f32::INFINITY
} else {
0.0
};
}
0.0
}
#[inline]
pub(crate) fn log2p1_polyeval_1(z: f64) -> f64 {
const P: [u64; 8] = [
0x0000000000000000,
0x3ff71547652b82fe,
0xbfe71547652b8d11,
0x3fdec709dc3a5014,
0xbfd715475b144983,
0x3fd2776c3fda300e,
0xbfcec990162358ce,
0x3fca645337c29e27,
];
let z2 = z * z;
let mut c5 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
let c3 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
let mut c1 = dd_fmla(f64::from_bits(P[2]), z, f64::from_bits(P[1]));
let z4 = z2 * z2;
c5 = dd_fmla(f64::from_bits(P[7]), z2, c5);
c1 = dd_fmla(c3, z2, c1);
c1 = dd_fmla(c5, z4, c1);
z * c1
}
pub(crate) static LOG2P1_COMPOUNDF_INV: [u64; 46] = [
0x3ff6800000000000,
0x3ff6000000000000,
0x3ff5800000000000,
0x3ff5000000000000,
0x3ff4c00000000000,
0x3ff4400000000000,
0x3ff4000000000000,
0x3ff3800000000000,
0x3ff3400000000000,
0x3ff2c00000000000,
0x3ff2800000000000,
0x3ff2000000000000,
0x3ff1c00000000000,
0x3ff1800000000000,
0x3ff1400000000000,
0x3ff1000000000000,
0x3ff0c00000000000,
0x3ff0800000000000,
0x3ff0000000000000,
0x3ff0000000000000,
0x3fef400000000000,
0x3feec00000000000,
0x3fee400000000000,
0x3fee000000000000,
0x3fed800000000000,
0x3fed000000000000,
0x3feca00000000000,
0x3fec400000000000,
0x3febe00000000000,
0x3feb800000000000,
0x3feb200000000000,
0x3feac00000000000,
0x3fea800000000000,
0x3fea200000000000,
0x3fe9c00000000000,
0x3fe9800000000000,
0x3fe9200000000000,
0x3fe8c00000000000,
0x3fe8800000000000,
0x3fe8400000000000,
0x3fe8000000000000,
0x3fe7c00000000000,
0x3fe7600000000000,
0x3fe7200000000000,
0x3fe6e00000000000,
0x3fe6a00000000000,
];
pub(crate) static LOG2P1_COMPOUNDF_LOG2_INV: [(u64, u64); 46] = [
(0x3c68f3673ffdd785, 0xbfdf7a8568cb06cf),
(0x3c1c141e66faaaad, 0xbfdd6753e032ea0f),
(0x3c76fae441c09d76, 0xbfdb47ebf73882a1),
(0x3c72d352bea51e59, 0xbfd91bba891f1709),
(0xbc69575b04fa6fbd, 0xbfd800a563161c54),
(0x3c7817fd3b7d7e5d, 0xbfd5c01a39fbd688),
(0x3c1b6d40900b2502, 0xbfd49a784bcd1b8b),
(0x3c7f6e91ad16ecff, 0xbfd24407ab0e073a),
(0x3c6a7b47d2c352d9, 0xbfd11307dad30b76),
(0x3c5b85a54d7ee2fd, 0xbfcd49ee4c325970),
(0x3c401ee1343fe7ca, 0xbfcacf5e2db4ec94),
(0x3c6817fd3b7d7e5d, 0xbfc5c01a39fbd688),
(0xbc4f51f2c075a74c, 0xbfc32ae9e278ae1a),
(0x3c6a7610e40bd6ab, 0xbfc08c588cda79e4),
(0xbc58ecb169b9465f, 0xbfbbc84240adabba),
(0xbc5f3314e0985116, 0xbfb663f6fac91316),
(0x3c530c22d15199b8, 0xbfb0eb389fa29f9b),
(0xbc389b03784b5be1, 0xbfa6bad3758efd87),
(0x0000000000000000, 0x0000000000000000),
(0x0000000000000000, 0x0000000000000000),
(0x3c3491f06c085bc2, 0x3fa184b8e4c56af8),
(0x3c0155660710eb2a, 0x3fad6ebd1f1febfe),
(0x3c2c141e66faaaad, 0x3fb4c560fe68af88),
(0x3c59ced1447e30ad, 0x3fb7d60496cfbb4c),
(0x3c592ce9636c90a0, 0x3fbe0b1ae8f2fd56),
(0xbc5696e2866c718e, 0x3fc22dadc2ab3497),
(0xbc61562d61af73f8, 0x3fc494f863b8df35),
(0xbc60798d1aa21694, 0x3fc7046031c79f85),
(0xbc6e95734abd2fcc, 0x3fc97c1cb13c7ec1),
(0x3c2bc0af7b82e7d7, 0x3fcbfc67a7fff4cc),
(0xbc6086fce864a1f6, 0x3fce857d3d361368),
(0xbc53d56efe4338fe, 0x3fd08bce0d95fa38),
(0x3c7c8d43e017579b, 0x3fd169c05363f158),
(0xbc50132ae5e417cd, 0x3fd2baa0c34be1ec),
(0xbc7c658d602e66b0, 0x3fd4106017c3eca3),
(0x3c7e393a16b94b52, 0x3fd4f6fbb2cec598),
(0x3c7ac9080333c605, 0x3fd6552b49986277),
(0x3c68f89e2eb553b2, 0x3fd7b89f02cf2aad),
(0x3c799aa6df8b7d83, 0x3fd8a8980abfbd32),
(0x3c7bca36fd02def0, 0x3fd99b072a96c6b2),
(0x3c5817fd3b7d7e5d, 0x3fda8ff971810a5e),
(0xbc501d98c3531027, 0x3fdb877c57b1b070),
(0x3c78a38b4175d665, 0x3fdcffae611ad12b),
(0x3c438c8946414c6a, 0x3fddfdd89d586e2b),
(0x3c76d261f1753e0b, 0x3fdefec61b011f85),
(0xbc87398fe685f171, 0x3fe0014332be0033),
];
#[inline]
fn compoundf_expf_poly(z: f64) -> f64 {
const Q: [u64; 5] = [
0x3ff0000000000000,
0x3fe62e42fef6d01a,
0x3fcebfbdff7feeba,
0x3fac6b167e579bee,
0x3f83b2b3428d06de,
];
let z2 = z * z;
let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
dd_fmla(c2, z2, c0)
}
pub(crate) fn compoundf_log2p1_fast(x: f64) -> f64 {
let u = 1.0 + x;
let mut v = u.to_bits();
let m: u64 = v & 0xfffffffffffffu64;
let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
v = v.wrapping_sub((e << 52) as u64);
let t = f64::from_bits(v);
v = (f64::from_bits(v) + 2.0).to_bits(); let i = (v >> 45) as i32 - 0x2002d; let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
let z = dd_fmla(r, t, -1.0); let p = log2p1_polyeval_1(z);
e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
}
pub(crate) static COMPOUNDF_EXP2_T: [u64; 33] = [
0xbfe0000000000000,
0xbfde000000000000,
0xbfdc000000000000,
0xbfda000000000000,
0xbfd8000000000000,
0xbfd6000000000000,
0xbfd4000000000000,
0xbfd2000000000000,
0xbfd0000000000000,
0xbfcc000000000000,
0xbfc8000000000000,
0xbfc4000000000000,
0xbfc0000000000000,
0xbfb8000000000000,
0xbfb0000000000000,
0xbfa0000000000000,
0x0000000000000000,
0x3fa0000000000000,
0x3fb0000000000000,
0x3fb8000000000000,
0x3fc0000000000000,
0x3fc4000000000000,
0x3fc8000000000000,
0x3fcc000000000000,
0x3fd0000000000000,
0x3fd2000000000000,
0x3fd4000000000000,
0x3fd6000000000000,
0x3fd8000000000000,
0x3fda000000000000,
0x3fdc000000000000,
0x3fde000000000000,
0x3fe0000000000000,
];
pub(crate) static COMPOUNDF_EXP2_U: [(u64, u64); 33] = [
(0xbc8bdd3413b26456, 0x3fe6a09e667f3bcd),
(0xbc716e4786887a99, 0x3fe71f75e8ec5f74),
(0xbc741577ee04992f, 0x3fe7a11473eb0187),
(0xbc8d4c1dd41532d8, 0x3fe82589994cce13),
(0x3c86e9f156864b27, 0x3fe8ace5422aa0db),
(0xbc575fc781b57ebc, 0x3fe93737b0cdc5e5),
(0x3c6c7c46b071f2be, 0x3fe9c49182a3f090),
(0xbc8d2f6edb8d41e1, 0x3fea5503b23e255d),
(0x3c87a1cd345dcc81, 0x3feae89f995ad3ad),
(0xbc65584f7e54ac3b, 0x3feb7f76f2fb5e47),
(0x3c711065895048dd, 0x3fec199bdd85529c),
(0x3c6503cbd1e949db, 0x3fecb720dcef9069),
(0x3c72ed02d75b3707, 0x3fed5818dcfba487),
(0xbc81a5cd4f184b5c, 0x3fedfc97337b9b5f),
(0xbc8e9c23179c2893, 0x3feea4afa2a490da),
(0x3c89d3e12dd8a18b, 0x3fef50765b6e4540),
(0x0000000000000000, 0x3ff0000000000000),
(0x3c8d73e2a475b465, 0x3ff059b0d3158574),
(0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
(0xbc96c51039449b3a, 0x3ff11301d0125b51),
(0xbc819041b9d78a76, 0x3ff172b83c7d517b),
(0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
(0x3c99b07eb6c70573, 0x3ff2387a6e756238),
(0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
(0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
(0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
(0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
(0x3c489b7a04ef80d0, 0x3ff44e086061892d),
(0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
(0xbc807abe1db13cad, 0x3ff5342b569d4f82),
(0x3c96324c054647ad, 0x3ff5ab07dd485429),
(0xbc9383c17e40b497, 0x3ff6247eb03a5585),
(0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
];
fn exp2_fast(t: f64) -> f64 {
let k = t.cpu_round_ties_even(); let mut r = t - k; let mut v: u64 = (3.015625 + r).to_bits(); let i: i32 = (v >> 46) as i32 - 0x10010; r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); v = (f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1) * compoundf_expf_poly(r)).to_bits();
let mut err: u64 = 0x3d61d00000000000; let ik = k as i64;
v = v.wrapping_add(ik.wrapping_shl(52) as u64);
if f64::from_bits(v) < f64::from_bits(0x38100000000008e2) {
return -1.0;
}
err = err.wrapping_add((ik << 52) as u64); let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
if lb != rb {
return -1.0;
}
f64::from_bits(v)
}
pub(crate) static LOG2P1_SCALE: [u64; 158] = [
0x41c0000000000000,
0x41b0000000000000,
0x41a0000000000000,
0x4190000000000000,
0x4180000000000000,
0x4170000000000000,
0x4160000000000000,
0x4150000000000000,
0x4140000000000000,
0x4130000000000000,
0x4120000000000000,
0x4110000000000000,
0x4100000000000000,
0x40f0000000000000,
0x40e0000000000000,
0x40d0000000000000,
0x40c0000000000000,
0x40b0000000000000,
0x40a0000000000000,
0x4090000000000000,
0x4080000000000000,
0x4070000000000000,
0x4060000000000000,
0x4050000000000000,
0x4040000000000000,
0x4030000000000000,
0x4020000000000000,
0x4010000000000000,
0x4000000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fd0000000000000,
0x3fc0000000000000,
0x3fb0000000000000,
0x3fa0000000000000,
0x3f90000000000000,
0x3f80000000000000,
0x3f70000000000000,
0x3f60000000000000,
0x3f50000000000000,
0x3f40000000000000,
0x3f30000000000000,
0x3f20000000000000,
0x3f10000000000000,
0x3f00000000000000,
0x3ef0000000000000,
0x3ee0000000000000,
0x3ed0000000000000,
0x3ec0000000000000,
0x3eb0000000000000,
0x3ea0000000000000,
0x3e90000000000000,
0x3e80000000000000,
0x3e70000000000000,
0x3e60000000000000,
0x3e50000000000000,
0x3e40000000000000,
0x3e30000000000000,
0x3e20000000000000,
0x3e10000000000000,
0x3e00000000000000,
0x3df0000000000000,
0x3de0000000000000,
0x3dd0000000000000,
0x3dc0000000000000,
0x3db0000000000000,
0x3da0000000000000,
0x3d90000000000000,
0x3d80000000000000,
0x3d70000000000000,
0x3d60000000000000,
0x3d50000000000000,
0x3d40000000000000,
0x3d30000000000000,
0x3d20000000000000,
0x3d10000000000000,
0x3d00000000000000,
0x3cf0000000000000,
0x3ce0000000000000,
0x3cd0000000000000,
0x3cc0000000000000,
0x3cb0000000000000,
0x3ca0000000000000,
0x3c90000000000000,
0x3c80000000000000,
0x3c70000000000000,
0x3c60000000000000,
0x3c50000000000000,
0x3c40000000000000,
0x3c30000000000000,
0x3c20000000000000,
0x3c10000000000000,
0x3c00000000000000,
0x3bf0000000000000,
0x3be0000000000000,
0x3bd0000000000000,
0x3bc0000000000000,
0x3bb0000000000000,
0x3ba0000000000000,
0x3b90000000000000,
0x3b80000000000000,
0x3b70000000000000,
0x3b60000000000000,
0x3b50000000000000,
0x3b40000000000000,
0x3b30000000000000,
0x3b20000000000000,
0x3b10000000000000,
0x3b00000000000000,
0x3af0000000000000,
0x3ae0000000000000,
0x3ad0000000000000,
0x3ac0000000000000,
0x3ab0000000000000,
0x3aa0000000000000,
0x3a90000000000000,
0x3a80000000000000,
0x3a70000000000000,
0x3a60000000000000,
0x3a50000000000000,
0x3a40000000000000,
0x3a30000000000000,
0x3a20000000000000,
0x3a10000000000000,
0x3a00000000000000,
0x39f0000000000000,
0x39e0000000000000,
0x39d0000000000000,
0x39c0000000000000,
0x39b0000000000000,
0x39a0000000000000,
0x3990000000000000,
0x3980000000000000,
0x3970000000000000,
0x3960000000000000,
0x3950000000000000,
0x3940000000000000,
0x3930000000000000,
0x3920000000000000,
0x3910000000000000,
0x3900000000000000,
0x38f0000000000000,
0x38e0000000000000,
0x38d0000000000000,
0x38c0000000000000,
0x38b0000000000000,
0x38a0000000000000,
0x3890000000000000,
0x3880000000000000,
0x3870000000000000,
0x3860000000000000,
0x3850000000000000,
0x3840000000000000,
0x3830000000000000,
0x3820000000000000,
0x3810000000000000,
0x3800000000000000,
0x37f0000000000000,
];
static LOG2P1_LOG2_POLY: [u64; 18] = [
0x3ff71547652b82fe,
0x3c7777d0ffda0d80,
0xbfe71547652b82fe,
0xbc6777d0fd20b49c,
0x3fdec709dc3a03fd,
0x3c7d27f05171b74a,
0xbfd71547652b82fe,
0xbc57814e70b828b0,
0x3fd2776c50ef9bfe,
0x3c7e4f63e12bff83,
0xbfcec709dc3a03f4,
0x3fca61762a7adecc,
0xbfc71547652d8849,
0x3fc484b13d7e7029,
0xbfc2776c1b2a40fd,
0x3fc0c9a80f9b7c1c,
0xbfbecc6801121200,
0x3fbc6e4b91fd10e5,
];
fn log2_poly2(z: DoubleDouble) -> DoubleDouble {
let mut h = f64::from_bits(LOG2P1_LOG2_POLY[4 + 13]);
for i in 7..=12 {
h = dd_fmla(z.hi, z.hi, f64::from_bits(LOG2P1_LOG2_POLY[4 + i]));
}
let mut v = DoubleDouble::quick_mult_f64(z, h);
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[10]));
v.hi = t.hi;
v.lo += t.lo;
v = DoubleDouble::quick_mult(v, z);
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[8]));
v.hi = t.hi;
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[9]);
v = DoubleDouble::quick_mult(v, z);
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[6]));
v.hi = t.hi;
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[7]);
v = DoubleDouble::quick_mult(v, z);
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[4]));
v.hi = t.hi;
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[5]);
v = DoubleDouble::quick_mult(v, z);
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[2]));
v.hi = t.hi;
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[3]);
v = DoubleDouble::quick_mult(v, z);
let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[0]));
v.hi = t.hi;
v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[1]);
v = DoubleDouble::quick_mult(v, z);
v
}
pub(crate) fn compoundf_log2p1_accurate(x: f64) -> DoubleDouble {
let mut v_dd = if 1.0 >= x {
if (x as f32).abs() >= f32::from_bits(0x25000000) {
DoubleDouble::from_exact_add(1.0, x)
} else {
DoubleDouble::new(x, 1.0)
}
} else {
DoubleDouble::from_exact_add(x, 1.0)
};
let mut v = v_dd.hi.to_bits();
let m = v & 0xfffffffffffffu64;
let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
let scale = f64::from_bits(LOG2P1_SCALE[e.wrapping_add(29) as usize]);
v_dd.hi *= scale;
v_dd.lo *= scale;
v = (2.0 + v_dd.hi).to_bits(); let i: i32 = (v >> 45) as i32 - 0x2002d; let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
let mut z_dd = DoubleDouble::new(r * v_dd.lo, dd_fmla(r, v_dd.hi, -1.0)); z_dd = DoubleDouble::from_exact_add(z_dd.hi, z_dd.lo);
let log_p = log2_poly2(z_dd);
let log2_inv = LOG2P1_COMPOUNDF_LOG2_INV[i as usize];
v_dd = DoubleDouble::from_exact_add(e as f64, f64::from_bits(log2_inv.1));
v_dd.lo += f64::from_bits(log2_inv.0);
let mut p = DoubleDouble::from_exact_add(v_dd.hi, log_p.hi);
p.lo += v_dd.lo + log_p.lo;
p
}
pub(crate) fn compoundf_exp2_poly2(z: DoubleDouble) -> DoubleDouble {
static Q2: [u64; 12] = [
0x3ff0000000000000,
0x3fe62e42fefa39ef,
0x3c7abc9d45534d06,
0x3fcebfbdff82c58f,
0xbc65e4383cf9ddf7,
0x3fac6b08d704a0c0,
0xbc46cbc55586c8f1,
0x3f83b2ab6fba4e77,
0x3f55d87fe789aec5,
0x3f2430912f879daa,
0x3eeffcc774b2367a,
0x3eb62c017b9bdfe6,
];
let h2 = z.hi * z.hi;
let c7 = dd_fmla(f64::from_bits(Q2[11]), z.hi, f64::from_bits(Q2[10]));
let mut c5 = dd_fmla(f64::from_bits(Q2[9]), z.hi, f64::from_bits(Q2[8]));
c5 = dd_fmla(c7, h2, c5);
let z_vqh = c5 * z.hi;
let mut q = DoubleDouble::from_exact_add(f64::from_bits(Q2[7]), z_vqh);
q = DoubleDouble::quick_mult(q, z);
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[5]), q.hi);
q.hi = t.hi;
q.lo += t.lo + f64::from_bits(Q2[6]);
q = DoubleDouble::quick_mult(q, z);
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[3]), q.hi);
q.hi = t.hi;
q.lo += t.lo + f64::from_bits(Q2[4]);
q = DoubleDouble::quick_mult(q, z);
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[1]), q.hi);
q.hi = t.hi;
q.lo += t.lo + f64::from_bits(Q2[2]);
q = DoubleDouble::quick_mult(q, z);
let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[0]), q.hi);
q.hi = t.hi;
q.lo += t.lo;
q
}
fn compoundf_exp2_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
if y == 1.0 {
let res = 1.0 + x;
return res;
}
let k = x_dd.hi.cpu_round_ties_even();
if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
return (1.0 + x_dd.hi * 0.5) as f32;
}
let r = x_dd.hi - k; let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
let mut v = (3.015625 + v_dd.hi).to_bits(); let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]);
v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
let q = compoundf_exp2_poly2(v_dd);
let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
let mut q = DoubleDouble::quick_mult(exp2u, q);
q = DoubleDouble::from_exact_add(q.hi, q.lo);
let mut w = q.hi.to_bits();
if ((w.wrapping_add(1)) & 0xfffffffu64) <= 2 {
static ERR: [u64; 2] = [0x3abb100000000000, 0x3a2d800000000000];
let small: bool = k == 0. && i == 16 && x_dd.hi <= f64::from_bits(0x3f60000000000000);
let err = f64::from_bits(ERR[small as usize]);
w = (q.hi + (q.lo + err)).to_bits();
w = unsafe { w.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
}
v = (q.hi + q.lo).to_bits();
if (w.wrapping_shl(36)) == 0 && f64::from_bits(v) == q.hi && q.lo != 0. {
v = v.wrapping_add((if q.lo > 0. { 1i64 } else { -1i64 }) as u64); }
v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
f64::from_bits(v) as f32
}
#[cold]
#[inline(never)]
fn compoundf_accurate(x: f32, y: f32) -> f32 {
let mut v = compoundf_log2p1_accurate(x as f64);
v = DoubleDouble::quick_mult_f64(v, y as f64);
compoundf_exp2_accurate(v, x, y)
}
#[inline]
pub fn f_compoundf(x: f32, y: f32) -> f32 {
let mone = (-1.0f32).to_bits();
let nx = x.to_bits();
let ny = y.to_bits();
if nx >= mone {
return as_compoundf_special(x, y);
}
let ax: u32 = nx.wrapping_shl(1);
let ay: u32 = ny.wrapping_shl(1);
if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
return as_compoundf_special(x, y);
}
if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
if ax <= 0x62000000u32 {
return 1.0 + y * x;
} let mut s = x as f64 + 1.;
let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
let mut acc = if iter_count % 2 != 0 { s } else { 1. };
while {
iter_count >>= 1;
iter_count
} != 0
{
s = s * s;
if iter_count % 2 != 0 {
acc *= s;
}
}
let dz = if y.is_sign_negative() { 1. / acc } else { acc };
return dz as f32;
}
let xd = x as f64;
let yd = y as f64;
let tx = xd.to_bits();
let ty = yd.to_bits();
let l: f64 = compoundf_log2p1_fast(f64::from_bits(tx));
let t: u64 = (l * f64::from_bits(ty)).to_bits();
if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
if t >= 0x3018bu64 << 46 {
return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
} else if (t >> 63) == 0 {
return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
}
}
if (t.wrapping_shl(1)) <= 0x3e6715476ba97f14u64 {
return if (t >> 63) != 0 {
black_box(1.0) - black_box(f32::from_bits(0x33000000))
} else {
black_box(1.0) + black_box(f32::from_bits(0x33000000))
};
}
let res = exp2_fast(f64::from_bits(t));
if res != -1.0 {
return res as f32;
}
compoundf_accurate(x, y)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compoundf() {
assert_eq!(
f_compoundf(
0.000000000000000000000000000000000000011754944,
-170502050000000000000000000000000000000.
),
1.
);
assert_eq!(f_compoundf(1.235, 1.432), 3.1634824);
assert_eq!(f_compoundf(2., 3.0), 27.);
assert!(f_compoundf(-2., 5.0).is_nan());
assert_eq!(f_compoundf(1., f32::INFINITY), f32::INFINITY);
assert_eq!(f_compoundf(1., f32::NEG_INFINITY), 0.0);
}
}