use crate::common::dd_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1, ldexp};
use crate::exponents::{EXPM1_T0, EXPM1_T1};
use crate::polyeval::f_polyeval6;
use crate::pow_tables::{EXP_T1_2_DYADIC, EXP_T2_2_DYADIC, POW_INVERSE, POW_LOG_INV};
use crate::rounding::CpuRound;
use crate::rounding::CpuRoundTiesEven;
#[inline(always)]
pub(crate) fn log_poly_1(z: f64) -> DoubleDouble {
const P_1: [u64; 6] = [
0x3fd5555555555558,
0xbfd0000000000003,
0x3fc999999981f535,
0xbfc55555553d1eb4,
0x3fc2494526fd4a06,
0xbfc0001f0c80e8ce,
];
let w = DoubleDouble::from_exact_mult(z, z);
let t = dd_fmla(f64::from_bits(P_1[5]), z, f64::from_bits(P_1[4]));
let mut u = dd_fmla(f64::from_bits(P_1[3]), z, f64::from_bits(P_1[2]));
let mut v = dd_fmla(f64::from_bits(P_1[1]), z, f64::from_bits(P_1[0]));
u = dd_fmla(t, w.hi, u);
v = dd_fmla(u, w.hi, v);
u = v * w.hi;
DoubleDouble::new(dd_fmla(u, z, -0.5 * w.lo), -0.5 * w.hi)
}
#[inline]
pub(crate) fn pow_log_1(x: f64) -> (DoubleDouble, bool) {
let x_u = x.to_bits();
let mut m = x_u & 0xfffffffffffff;
let mut e: i64 = ((x_u >> 52) & 0x7ff) as i64;
let t;
if e != 0 {
t = m | (0x3ffu64 << 52);
m = m.wrapping_add(1u64 << 52);
e -= 0x3ff;
} else {
let k = m.leading_zeros() - 11;
e = -0x3fei64 - k as i64;
m = m.wrapping_shl(k);
t = m | (0x3ffu64 << 52);
}
let mut t = f64::from_bits(t);
let c: usize = (m >= 0x16a09e667f3bcd) as usize;
static CY: [f64; 2] = [1.0, 0.5];
static CM: [u64; 2] = [44, 45];
e = e.wrapping_add(c as i64);
let be = e;
let i = m >> CM[c];
t *= CY[c];
let r = f64::from_bits(POW_INVERSE[(i - 181) as usize]);
let l1 = f64::from_bits(POW_LOG_INV[(i - 181) as usize].1);
let l2 = f64::from_bits(POW_LOG_INV[(i - 181) as usize].0);
let z = dd_fmla(r, t, -1.0);
const LOG2_DD: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3d2ef35793c76730),
f64::from_bits(0x3fe62e42fefa3800),
);
let th = dd_fmla(be as f64, LOG2_DD.hi, l1);
let tl = dd_fmla(be as f64, LOG2_DD.lo, l2);
let mut v = DoubleDouble::f64_add(th, DoubleDouble::new(tl, z));
let p = log_poly_1(z);
v = DoubleDouble::f64_add(v.hi, DoubleDouble::new(v.lo + p.lo, p.hi));
if e == 0 && v.lo.abs() > (v.hi.abs()) * f64::from_bits(0x3e70000000000000) {
v = DoubleDouble::from_exact_add(v.hi, v.lo);
return (v, true);
}
(v, false)
}
#[inline(always)]
fn exp_poly_1(z: f64) -> DoubleDouble {
const Q_1: [u64; 5] = [
0x3ff0000000000000,
0x3ff0000000000000,
0x3fe0000000000000,
0x3fc5555555997996,
0x3fa5555555849d8d,
];
let mut q = dd_fmla(f64::from_bits(Q_1[4]), z, f64::from_bits(Q_1[3]));
q = dd_fmla(q, z, f64::from_bits(Q_1[2]));
let h0 = dd_fmla(q, z, f64::from_bits(Q_1[1]));
let v1 = DoubleDouble::from_exact_mult(z, h0);
DoubleDouble::f64_add(f64::from_bits(Q_1[0]), v1)
}
#[inline]
pub(crate) fn pow_exp_1(r: DoubleDouble, s: f64) -> DoubleDouble {
const RHO0: f64 = f64::from_bits(0xc0874910ee4e8a27);
const RHO1: f64 = f64::from_bits(0xc08483b8cca421af);
const RHO2: f64 = f64::from_bits(0x40862e42e709a95b);
const RHO3: f64 = f64::from_bits(0x40862e4316ea5df9);
#[allow(clippy::neg_cmp_op_on_partial_ord)]
if !(r.hi <= RHO2) {
return if r.hi > RHO3 {
DoubleDouble::new(
f64::from_bits(0x7fefffffffffffff) * s,
f64::from_bits(0x7fefffffffffffff) * s,
)
} else {
DoubleDouble::new(f64::NAN, f64::NAN)
};
}
if r.hi < RHO1 {
return if r.hi < RHO0 {
DoubleDouble::new(f64::from_bits(0x0000000000000001) * (0.5 * s), 0.0 * s)
} else {
DoubleDouble::new(f64::NAN, f64::NAN)
};
}
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r.hi * INVLOG2).cpu_round();
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
let zh = dd_fmla(LOG2H, -k, r.hi);
let zl = dd_fmla(LOG2L, -k, r.lo);
let bk = unsafe { k.to_int_unchecked::<i64>() };
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let mut de = DoubleDouble::quick_mult(t1, t0);
let q = exp_poly_1(zh + zl);
de = DoubleDouble::quick_mult(de, q);
let mut du = (mk as u64).wrapping_shl(52);
du = (f64::from_bits(du) * s).to_bits();
de.hi *= f64::from_bits(du);
de.lo *= f64::from_bits(du);
de
}
#[inline]
pub(crate) fn exp_dd_fast(r: DoubleDouble) -> DoubleDouble {
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r.hi * INVLOG2).cpu_round();
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
let mut z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
z = DoubleDouble::from_exact_add(z.hi, z.lo);
let bk = unsafe { k.to_int_unchecked::<i64>() };
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let mut de = DoubleDouble::quick_mult(t1, t0);
let q_hi = exp_poly_1(z.hi);
let q_lo = DoubleDouble::from_exact_add(1., z.lo);
let q = DoubleDouble::quick_mult(q_hi, q_lo);
de = DoubleDouble::quick_mult(de, q);
let du = (mk as u64).wrapping_shl(52);
de.hi *= f64::from_bits(du);
de.lo *= f64::from_bits(du);
de
}
#[inline]
pub(crate) fn pow_exp_dd(r: DoubleDouble, s: f64) -> DoubleDouble {
const RHO0: f64 = f64::from_bits(0xc0874910ee4e8a27);
const RHO1: f64 = f64::from_bits(0xc08483b8cca421af);
const RHO2: f64 = f64::from_bits(0x40862e42e709a95b);
const RHO3: f64 = f64::from_bits(0x40862e4316ea5df9);
#[allow(clippy::neg_cmp_op_on_partial_ord)]
if !(r.hi <= RHO2) {
return if r.hi > RHO3 {
DoubleDouble::new(
f64::from_bits(0x7fefffffffffffff) * s,
f64::from_bits(0x7fefffffffffffff) * s,
)
} else {
DoubleDouble::new(f64::NAN, f64::NAN)
};
}
if r.hi < RHO1 {
return if r.hi < RHO0 {
DoubleDouble::new(f64::from_bits(0x0000000000000001) * (0.5 * s), 0.0 * s)
} else {
DoubleDouble::new(f64::NAN, f64::NAN)
};
}
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r.hi * INVLOG2).cpu_round();
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
let z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
let bk = unsafe { k.to_int_unchecked::<i64>() };
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
let mut de = DoubleDouble::quick_mult(t1, t0);
let q = exp_poly_1(z.to_f64());
de = DoubleDouble::quick_mult(de, q);
let mut du = (mk as u64).wrapping_shl(52);
du = (f64::from_bits(du) * s).to_bits();
de.hi *= f64::from_bits(du);
de.lo *= f64::from_bits(du);
de
}
#[inline(always)]
pub(crate) fn expm1_poly_fast(z: DoubleDouble) -> DoubleDouble {
let p = f_polyeval6(
z.hi,
f64::from_bits(0x3fe0000000000000),
f64::from_bits(0x3fc5555555555555),
f64::from_bits(0x3fa55555555553de),
f64::from_bits(0x3f81111144995a9a),
f64::from_bits(0x3f56c241f9a791c5),
f64::from_bits(0xbfad9209c6d8b9e1),
);
let px = DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_mult(z.hi, p), z.hi);
let expm1_hi = DoubleDouble::f64_add(z.hi, px);
let mut lowest_part = DoubleDouble::quick_mult_f64(expm1_hi, z.lo);
lowest_part = DoubleDouble::full_add_f64(lowest_part, z.lo);
DoubleDouble::quick_dd_add(expm1_hi, lowest_part)
}
#[inline(always)]
pub(crate) fn expm1_poly_dd_tiny(z: DoubleDouble) -> DoubleDouble {
const Q: [(u64, u64); 9] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x0000000000000000, 0x3fe0000000000000),
(0x3c6555564150ff16, 0x3fc5555555555555),
(0x3c4586275c26f8a5, 0x3fa5555555555555),
(0xbc19e6193ac658a6, 0x3f81111111111111),
(0xbbf025e72dc21051, 0x3f56c16c16c1500a),
(0x3bc2d641a7b7b9b8, 0x3f2a01a01a07dc46),
(0xbb42cc8aaeeb3d00, 0x3efa01a29fef3e6f),
(0x3b52b1589125ce82, 0x3ec71db6af553255),
];
let z = DoubleDouble::from_exact_add(z.hi, z.lo);
let mut d = DoubleDouble::quick_mul_add(
z,
DoubleDouble::from_bit_pair(Q[8]),
DoubleDouble::from_bit_pair(Q[7]),
);
d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[6]));
d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[5]));
d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[4]));
d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[3]));
d = DoubleDouble::quick_mul_add(z, d, DoubleDouble::from_bit_pair(Q[2]));
d = DoubleDouble::quick_mul_add_f64(z, d, f64::from_bits(0x3fe0000000000000));
d = DoubleDouble::quick_mul_add_f64(z, d, f64::from_bits(0x3ff0000000000000));
DoubleDouble::quick_mult(d, z)
}
#[inline]
pub(crate) fn pow_expm1_1(r: DoubleDouble, s: f64) -> DoubleDouble {
const RHO0: f64 = f64::from_bits(0xc0874910ee4e8a27);
const RHO1: f64 = f64::from_bits(0xc08483b8cca421af);
const RHO2: f64 = f64::from_bits(0x40862e42e709a95b);
const RHO3: f64 = f64::from_bits(0x40862e4316ea5df9);
#[allow(clippy::neg_cmp_op_on_partial_ord)]
if !(r.hi <= RHO2) {
return if r.hi > RHO3 {
DoubleDouble::new(
f64::from_bits(0x7fefffffffffffff) * s,
f64::from_bits(0x7fefffffffffffff) * s,
)
} else {
DoubleDouble::new(f64::NAN, f64::NAN)
};
}
if r.hi < RHO1 {
if r.hi < RHO0 {
return DoubleDouble::full_add_f64(
DoubleDouble::new(f64::from_bits(0x0000000000000001) * (0.5 * s), 0.0 * s),
-1.0,
);
} else {
return DoubleDouble::new(0., -1.);
};
}
let ax = r.hi.to_bits() & 0x7fffffffffffffffu64;
const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
if ax <= 0x3f80000000000000 {
if ax < 0x3970000000000000 {
return r;
}
let d = expm1_poly_dd_tiny(r);
return d;
}
const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe);
let k = (r.hi * INVLOG2).cpu_round_ties_even();
let z = DoubleDouble::mul_f64_add(DoubleDouble::new(LOG2L, LOG2H), -k, r);
let bk = unsafe { k.to_int_unchecked::<i64>() };
let mk = (bk >> 12) + 0x3ff;
let i2 = (bk >> 6) & 0x3f;
let i1 = bk & 0x3f;
let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i2 as usize]);
let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
let tbh = DoubleDouble::quick_mult(t1, t0);
let mut de = tbh;
let q = expm1_poly_fast(z);
de = DoubleDouble::quick_mult(de, q);
de = DoubleDouble::add(tbh, de);
let ie = mk - 0x3ff;
let off: f64 = f64::from_bits((2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64);
let e: f64;
if ie < 53 {
let fhz = DoubleDouble::from_exact_add(off, de.hi);
de.hi = fhz.hi;
e = fhz.lo;
} else if ie < 104 {
let fhz = DoubleDouble::from_exact_add(de.hi, off);
de.hi = fhz.hi;
e = fhz.lo;
} else {
e = 0.;
}
de.lo += e;
de.hi = ldexp(de.to_f64(), ie as i32);
de.lo = 0.;
de
}
fn exp_dyadic_poly(x: DyadicFloat128) -> DyadicFloat128 {
const Q_2: [DyadicFloat128; 8] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffd0_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0088_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0x8000_0000_0000_0000_0000_000c_06f3_cd29_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -130,
mantissa: 0xaaaa_aaaa_aaaa_aaaa_aaaa_aa6a_1e07_76ae_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -132,
mantissa: 0xaaaa_aaaa_aaaa_aaa3_0000_0000_0000_0000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -134,
mantissa: 0x8888_8888_8888_8897_0000_0000_0000_0000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -137,
mantissa: 0xb60b_60b9_3214_6a54_0000_0000_0000_0000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -140,
mantissa: 0xd00d_00cd_9841_6862_0000_0000_0000_0000_u128,
},
];
let mut p = Q_2[7];
for i in (0..7).rev() {
p = x * p + Q_2[i];
}
p
}
#[inline]
pub(crate) fn exp_dyadic(x: DyadicFloat128) -> DyadicFloat128 {
let ex = x.exponent + 127;
if ex >= 10
{
return DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: if x.sign == DyadicSign::Neg {
-1076
} else {
1025
},
mantissa: x.mantissa,
};
}
const LOG2_INV: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xb8aa_3b29_5c17_f0bc_0000_0000_0000_0000_u128,
};
const LOG2: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
};
let mut bk = x * LOG2_INV;
let k = bk.trunc_to_i64();
bk = LOG2.mul_int64(k);
bk.exponent -= 12;
bk.sign = bk.sign.negate();
let y = x + bk;
let bm = k >> 12;
let i2 = (k >> 6) & 0x3f;
let i1 = k & 0x3f;
let mut r = exp_dyadic_poly(y);
r = EXP_T1_2_DYADIC[i2 as usize] * r;
r = EXP_T2_2_DYADIC[i1 as usize] * r;
r.exponent += bm as i16;
r
}
#[cfg(test)]
mod tests {
use super::*;
use crate::f_expm1;
#[test]
fn test_log() {
let k = DyadicFloat128::new_from_f64(2.5);
assert_eq!(exp_dyadic(k).fast_as_f64(), 12.182493960703473);
}
#[test]
fn test_exp() {
let k = pow_expm1_1(DoubleDouble::new(0., 2.543543543543), 1.);
println!("{}", k.to_f64());
println!("{}", f_expm1(2.543543543543));
}
}