use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::exponents::exp2m1::{EXP_M1_2_TABLE1, EXP_M1_2_TABLE2};
use crate::rounding::CpuFloor;
use crate::rounding::CpuRoundTiesEven;
const LN10H: f64 = f64::from_bits(0x40026bb1bbb55516);
const LN10L: f64 = f64::from_bits(0xbcaf48ad494ea3e9);
struct Exp10m1 {
exp: DoubleDouble,
err: f64,
}
#[inline]
fn q_1(dz: DoubleDouble) -> DoubleDouble {
const Q_1: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555995d37,
0x3fa55555558489dc,
];
let z = dz.to_f64();
let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3]));
q = f_fmla(q, z, f64::from_bits(Q_1[2]));
let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z);
p0 = DoubleDouble::quick_mult(dz, p0);
p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0);
p0
}
#[inline]
fn exp1(x: DoubleDouble) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (x.hi * INVLOG2).cpu_round_ties_even();
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
let mut zk = DoubleDouble::from_exact_mult(LOG2H, k);
zk.lo = f_fmla(k, LOG2L, zk.lo);
let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo);
yz.lo -= zk.lo;
let ik: i64 = unsafe { k.to_int_unchecked::<i64>() };
let im: i64 = (ik >> 12).wrapping_add(0x3ff);
let i2: i64 = (ik >> 6) & 0x3f;
let i1: i64 = ik & 0x3f;
let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
let p0 = DoubleDouble::quick_mult(t2, t1);
let mut q = q_1(yz);
q = DoubleDouble::quick_mult(p0, q);
let mut du = (im as u64).wrapping_shl(52);
if im == 0x7ff {
q.hi *= 2.0;
q.lo *= 2.0;
du = (im.wrapping_sub(1) as u64).wrapping_shl(52);
}
q.hi *= f64::from_bits(du);
q.lo *= f64::from_bits(du);
q
}
#[inline]
fn exp10m1_fast(x: f64, tiny: bool) -> Exp10m1 {
if tiny {
return exp10m1_fast_tiny(x);
}
let v = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
let mut p = exp1(v);
let zf: DoubleDouble = if x >= 0. {
DoubleDouble::from_exact_add(p.hi, -1.0)
} else {
DoubleDouble::from_exact_add(-1.0, p.hi)
};
p.lo += zf.lo;
p.hi = zf.hi;
Exp10m1 {
exp: p,
err: f64::from_bits(0x3b77a00000000000) * p.hi,
}
}
#[inline]
fn q_2(dz: DoubleDouble) -> DoubleDouble {
const Q_2: [u64; 9] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555555555,
0x3c655555555c4d26,
0x3fa5555555555555,
0x3f81111111111111,
0x3f56c16c3fbb4213,
0x3f2a01a023ede0d7,
];
let z = dz.to_f64();
let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7]));
q = dd_fmla(q, z, f64::from_bits(Q_2[6]));
q = dd_fmla(q, z, f64::from_bits(Q_2[5]));
let mut p = DoubleDouble::from_exact_mult(q, z);
let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi);
p.hi = r0.hi;
p.lo += r0.lo + f64::from_bits(Q_2[4]);
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
p = DoubleDouble::quick_mult(p, dz);
let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi);
p.hi = r1.hi;
p.lo += r1.lo;
p
}
#[inline]
fn exp_2(x: f64) -> DoubleDouble {
let mut k = (x * f64::from_bits(0x40ca934f0979a371)).cpu_round_ties_even();
if k == 4194304. {
k = 4194303.; }
const LOG2_10H: f64 = f64::from_bits(0x3f134413509f79ff);
const LOG2_10M: f64 = f64::from_bits(0xbb89dc1da9800000);
const LOG2_10L: f64 = f64::from_bits(0xb984fd20dba1f655);
let yhh = dd_fmla(-k, LOG2_10H, x);
let mut ky0 = DoubleDouble::from_exact_add(yhh, -k * LOG2_10M);
ky0.lo = dd_fmla(-k, LOG2_10L, ky0.lo);
let ky = DoubleDouble::quick_mult(ky0, DoubleDouble::new(LN10L, LN10H));
let ik = unsafe {
k.to_int_unchecked::<i64>() };
let im = (ik >> 12).wrapping_add(0x3ff);
let i2 = (ik >> 6) & 0x3f;
let i1 = ik & 0x3f;
let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
let p = DoubleDouble::quick_mult(t2, t1);
let mut q = q_2(ky);
q = DoubleDouble::quick_mult(p, q);
let mut ud: u64 = (im as u64).wrapping_shl(52);
if im == 0x7ff {
q.hi *= 2.0;
q.lo *= 2.0;
ud = (im.wrapping_sub(1) as u64).wrapping_shl(52);
}
q.hi *= f64::from_bits(ud);
q.lo *= f64::from_bits(ud);
q
}
#[cold]
fn exp10m1_accurate_tiny(x: f64) -> f64 {
let x2 = x * x;
let x4 = x2 * x2;
const Q: [u64; 25] = [
0x40026bb1bbb55516,
0xbcaf48ad494ea3e9,
0x40053524c73cea69,
0xbcae2bfab318d696,
0x4000470591de2ca4,
0x3ca823527cebf918,
0x3ff2bd7609fd98c4,
0x3c931ea51f6641df,
0x3fe1429ffd1d4d76,
0x3c7117195be7f232,
0x3fca7ed70847c8b6,
0xbc54260c5e23d0c8,
0x3fb16e4dfc333a87,
0xbc533fd284110905,
0x3f94116b05fdaa5d,
0xbc20721de44d79a8,
0x3f74897c45d93d43,
0x3f52ea52b2d182ac,
0x3f2facfd5d905b22,
0x3f084fe12df8bde3,
0x3ee1398ad75d01bf,
0x3eb6a9e96fbf6be7,
0x3e8bd456a29007c2,
0x3e6006cf8378cf9b,
0x3e368862b132b6e2,
];
let mut c13 = dd_fmla(f64::from_bits(Q[23]), x, f64::from_bits(Q[22])); let c11 = dd_fmla(f64::from_bits(Q[21]), x, f64::from_bits(Q[20])); c13 = dd_fmla(f64::from_bits(Q[24]), x2, c13); let mut p = DoubleDouble::from_exact_add(
f64::from_bits(Q[18]),
f_fmla(f64::from_bits(Q[19]), x, f_fmla(c11, x2, c13 * x4)),
);
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[17]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[16]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[15]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[13]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[11]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[9]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[7]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[5]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[3]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi);
p.lo += p0.lo + f64::from_bits(Q[1]);
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
p.to_f64()
}
#[cold]
fn exp10m1_accurate(x: f64) -> f64 {
let t = x.to_bits();
let ux = t;
let ax = ux & 0x7fffffffffffffffu64;
if ax <= 0x3fc0000000000000u64 {
return exp10m1_accurate_tiny(x);
}
let mut p = exp_2(x);
let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0);
p.lo += zf.lo;
p.hi = zf.hi;
p.to_f64()
}
#[inline]
fn exp10m1_fast_tiny(x: f64) -> Exp10m1 {
const P: [u64; 14] = [
0x40026bb1bbb55516,
0xbcaf48abcf79e094,
0x40053524c73cea69,
0xbcae1badf796d704,
0x4000470591de2ca4,
0x3ca7db8caacb2cea,
0x3ff2bd7609fd98ba,
0x3fe1429ffd1d4d98,
0x3fca7ed7084998e1,
0x3fb16e4dfc30944b,
0x3f94116ae4b57526,
0x3f74897c6a90f61c,
0x3f52ec689c32b3a0,
0x3f2faced20d698fe,
];
let x2 = x * x;
let x4 = x2 * x2;
let mut c9 = dd_fmla(f64::from_bits(P[12]), x, f64::from_bits(P[11])); let c7 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); let mut c5 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); c9 = dd_fmla(f64::from_bits(P[13]), x2, c9); c5 = dd_fmla(c7, x2, c5); c5 = dd_fmla(c9, x4, c5);
let mut p = DoubleDouble::from_exact_mult(c5, x);
let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[6]), p.hi);
p.lo += p0.lo;
p.hi = p0.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi);
p.lo += p1.lo + f64::from_bits(P[5]);
p.hi = p1.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi);
p.lo += p2.lo + f64::from_bits(P[3]);
p.hi = p2.hi;
p = DoubleDouble::quick_f64_mult(x, p);
let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi);
p.lo += p2.lo + f64::from_bits(P[1]);
p.hi = p2.hi;
p = DoubleDouble::quick_f64_mult(x, p);
Exp10m1 {
exp: p,
err: f64::from_bits(0x3bb0a00000000000) * p.hi, }
}
pub fn f_exp10m1(d: f64) -> f64 {
let mut x = d;
let t = x.to_bits();
let ux = t;
let ax = ux & 0x7fffffffffffffffu64;
if ux >= 0xc03041704c068ef0u64 {
if (ux >> 52) == 0xfff {
return if ux > 0xfff0000000000000u64 {
x + x
} else {
-1.0
};
}
return -1.0 + f64::from_bits(0x3c90000000000000);
} else if ax > 0x40734413509f79feu64 {
if (ux >> 52) == 0x7ff {
return x + x;
}
return f64::from_bits(0x7fefffffffffffff) * x;
} else if ax <= 0x3c90000000000000u64
{
return if ax <= 0x3970000000000000u64
{
if x == 0. {
return x;
}
if x.abs() == f64::from_bits(0x000086c73059343c) {
return dd_fmla(
-f64::copysign(f64::from_bits(0x1e60010000000000), x),
f64::from_bits(0x1e50000000000000),
f64::copysign(f64::from_bits(0x000136568740cb56), x),
);
}
if x.abs() == f64::from_bits(0x00013a7b70d0248c) {
return dd_fmla(
f64::copysign(f64::from_bits(0x1e5ffe0000000000), x),
f64::from_bits(0x1e50000000000000),
f64::copysign(f64::from_bits(0x0002d41f3b972fc7), x),
);
}
x *= f64::from_bits(0x4690000000000000);
let mut z = DoubleDouble::from_exact_mult(LN10H, x);
z.lo = dd_fmla(LN10L, x, z.lo);
let mut h2 = z.to_f64(); h2 *= f64::from_bits(0x3950000000000000);
let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi);
h += z.lo;
dyad_fmla(h, f64::from_bits(0x3950000000000000), h2)
} else {
const C2: f64 = f64::from_bits(0x40053524c73cea69); let mut z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
z.lo = dd_fmla(C2 * x, x, z.lo);
z.to_f64()
};
}
if ux << 15 == 0 {
let i = unsafe { x.cpu_floor().to_int_unchecked::<i32>() };
if x == i as f64 && 1 <= i && i <= 15 {
static EXP10_1_15: [u64; 16] = [
0x0000000000000000,
0x4022000000000000,
0x4058c00000000000,
0x408f380000000000,
0x40c3878000000000,
0x40f869f000000000,
0x412e847e00000000,
0x416312cfe0000000,
0x4197d783fc000000,
0x41cdcd64ff800000,
0x4202a05f1ff80000,
0x42374876e7ff0000,
0x426d1a94a1ffe000,
0x42a2309ce53ffe00,
0x42d6bcc41e8fffc0,
0x430c6bf52633fff8,
];
return f64::from_bits(EXP10_1_15[i as usize]);
}
}
let result = exp10m1_fast(x, ax <= 0x3fb0000000000000u64);
let left = result.exp.hi + (result.exp.lo - result.err);
let right = result.exp.hi + (result.exp.lo + result.err);
if left != right {
return exp10m1_accurate(x);
}
left
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_exp10m1() {
assert_eq!(f_exp10m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002364140972981833),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005443635762124408);
assert_eq!(99., f_exp10m1(2.0));
assert_eq!(315.22776601683796, f_exp10m1(2.5));
assert_eq!(-0.7056827241416722, f_exp10m1(-0.5311842449009418));
}
}