use crate::bits::{biased_exponent_f64, get_exponent_f64, mantissa_f64};
use crate::common::{dd_fmla, dyad_fmla, f_fmla};
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::logs::log2p1_dyadic_tables::{LOG2P1_F128_POLY, LOG2P1_INVERSE_2, LOG2P1_LOG_INV_2};
use crate::logs::log2p1_tables::{LOG2P1_EXACT, LOG2P1_INVERSE, LOG2P1_LOG_DD_INVERSE};
#[inline]
pub(crate) fn log_p_1a(z: f64) -> DoubleDouble {
let z2: DoubleDouble = if z.abs() >= f64::from_bits(0x3000000000000000) {
DoubleDouble::from_exact_mult(z, z)
} else {
DoubleDouble::default()
};
let z4h = z2.hi * z2.hi;
const PA: [u64; 11] = [
0x3ff0000000000000,
0xbfe0000000000000,
0x3fd5555555555555,
0xbfcffffffffffe5f,
0x3fc999999999aa82,
0xbfc555555583a8c8,
0x3fc2492491c359e6,
0xbfbffffc728edeea,
0x3fbc71c961f34980,
0xbfb9a82ac77c05f4,
0x3fb74b40dd1707d3,
];
let p910 = dd_fmla(f64::from_bits(PA[10]), z, f64::from_bits(PA[9]));
let p78 = dd_fmla(f64::from_bits(PA[8]), z, f64::from_bits(PA[7]));
let p56 = dd_fmla(f64::from_bits(PA[6]), z, f64::from_bits(PA[5]));
let p34 = dd_fmla(f64::from_bits(PA[4]), z, f64::from_bits(PA[3]));
let p710 = dd_fmla(p910, z2.hi, p78);
let p36 = dd_fmla(p56, z2.hi, p34);
let mut ph = dd_fmla(p710, z4h, p36);
ph = dd_fmla(ph, z, f64::from_bits(PA[2]));
ph *= z2.hi;
let mut p = DoubleDouble::from_exact_add(-0.5 * z2.hi, ph * z);
p.lo += -0.5 * z2.lo;
p
}
#[inline]
fn p_1(z: f64) -> DoubleDouble {
const P: [u64; 7] = [
0x3ff0000000000000,
0xbfe0000000000000,
0x3fd5555555555550,
0xbfcfffffffff572d,
0x3fc999999a2d7868,
0xbfc5555c0d31b08e,
0x3fc2476b9058e396,
];
let z2 = DoubleDouble::from_exact_mult(z, z);
let p56 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
let p34 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
let mut ph = dd_fmla(p56, z2.hi, p34);
ph = dd_fmla(ph, z, f64::from_bits(P[2]));
ph *= z2.hi;
let mut p = DoubleDouble::from_exact_add(-0.5 * z2.hi, ph * z);
p.lo += -0.5 * z2.lo;
p
}
#[inline]
pub(crate) fn log_fast(e: i32, v_u: u64) -> DoubleDouble {
let m: u64 = 0x10000000000000u64.wrapping_add(v_u & 0xfffffffffffff);
let c: i32 = if m >= 0x16a09e667f3bcd { 1 } else { 0 };
let e = e.wrapping_add(c);
static CY: [f64; 2] = [1.0, 0.5];
static CM: [u32; 2] = [43, 44];
let i: i32 = (m >> CM[c as usize]) as i32;
let y = f64::from_bits(v_u) * CY[c as usize];
const OFFSET: i32 = 362;
let r = f64::from_bits(LOG2P1_INVERSE[(i - OFFSET) as usize]);
let log2_inv_dd = LOG2P1_LOG_DD_INVERSE[(i - OFFSET) as usize];
let l1 = f64::from_bits(log2_inv_dd.1);
let l2 = f64::from_bits(log2_inv_dd.0);
let z = dd_fmla(r, y, -1.0);
let p = p_1(z);
const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa3800);
const LOG2_L: f64 = f64::from_bits(0x3d2ef35793c76730);
let ee = e as f64;
let mut vl = DoubleDouble::from_exact_add(f_fmla(ee, LOG2_H, l1), z);
vl.lo = p.hi + (vl.lo + (l2 + p.lo));
vl.lo = dd_fmla(ee, LOG2_L, vl.lo);
vl
}
const INV_LOG2_DD: DoubleDouble = DoubleDouble::new(
f64::from_bits(0x3c7777d0ffda0d24),
f64::from_bits(0x3ff71547652b82fe),
);
fn log2p1_accurate_small(x: f64) -> f64 {
static P_ACC: [u64; 24] = [
0x3ff71547652b82fe,
0x3c7777d0ffda0d24,
0xbfe71547652b82fe,
0xbc6777d0ffd9ddb8,
0x3fdec709dc3a03fd,
0x3c7d27f055481523,
0xbfd71547652b82fe,
0xbc5777d1456a14c4,
0x3fd2776c50ef9bfe,
0x3c7e4b2a04f81513,
0xbfcec709dc3a03fd,
0xbc6d2072e751087a,
0x3fca61762a7aded9,
0x3c5f90f4895378ac,
0xbfc71547652b8301,
0x3fc484b13d7c02ae,
0xbfc2776c50ef7591,
0x3fc0c9a84993cabb,
0xbfbec709de7b1612,
0x3fbc68f56ba73fd1,
0xbfba616c83da87e7,
0x3fb89f3042097218,
0xbfb72b376930a3fa,
0x3fb5d0211d5ab530,
];
let mut h = dd_fmla(f64::from_bits(P_ACC[23]), x, f64::from_bits(P_ACC[22])); for i in (11..=15).rev() {
h = dd_fmla(h, x, f64::from_bits(P_ACC[(i + 6) as usize])); }
let mut l = 0.;
for i in (8..=10).rev() {
let mut p = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
l = p.lo;
p = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[(i + 6) as usize]), p.hi);
h = p.hi;
l += p.lo;
}
for i in (1..=7).rev() {
let mut p = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
l = p.lo;
p = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[(2 * i - 2) as usize]), p.hi);
h = p.hi;
l += p.lo + f64::from_bits(P_ACC[(2 * i - 1) as usize]);
}
let pz = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
pz.to_f64()
}
#[cold]
fn log2p1_accurate_tiny(x: f64) -> f64 {
if x.abs() == f64::from_bits(0x0002c316a14459d8) {
return if x > 0. {
dd_fmla(
f64::from_bits(0x1a70000000000000),
f64::from_bits(0x1a70000000000000),
f64::from_bits(0x0003fc1ce8b1583f),
)
} else {
dd_fmla(
f64::from_bits(0x9a70000000000000),
f64::from_bits(0x1a70000000000000),
f64::from_bits(0x8003fc1ce8b1583f),
)
};
}
let sx = x * f64::from_bits(0x4690000000000000);
let mut zh = DoubleDouble::quick_f64_mult(sx, INV_LOG2_DD);
let res = zh.to_f64() * f64::from_bits(0x3950000000000000); zh.lo += dd_fmla(-res, f64::from_bits(0x4690000000000000), zh.hi);
dyad_fmla(zh.lo, f64::from_bits(0x3950000000000000), res)
}
#[inline]
fn log2p1_fast(x: f64, e: i32) -> (DoubleDouble, f64) {
if e < -5
{
if e <= -969 {
let ax = x.abs();
let result = if ax < f64::from_bits(0x3960000000000000) {
log2p1_accurate_tiny(x)
} else {
log2p1_accurate_small(x)
};
return (DoubleDouble::new(0.0, result), 0.0);
}
let mut p = log_p_1a(x);
let p_lo = p.lo;
p = DoubleDouble::from_exact_add(x, p.hi);
p.lo += p_lo;
p = DoubleDouble::quick_mult(p, INV_LOG2_DD);
return (p, f64::from_bits(0x3c1d400000000000) * p.hi);
}
let zx = DoubleDouble::from_full_exact_add(1.0, x);
let mut v_u = zx.hi.to_bits();
let e = ((v_u >> 52) as i32).wrapping_sub(0x3ff);
v_u = (0x3ffu64 << 52) | (v_u & 0xfffffffffffff);
let mut p = log_fast(e, v_u);
let c = if zx.hi <= f64::from_bits(0x7fd0000000000000) || zx.lo.abs() >= 4.0 {
zx.lo / zx.hi
} else {
0.
};
p.lo += c;
p = DoubleDouble::quick_mult(p, INV_LOG2_DD);
(p, f64::from_bits(0x3bb2300000000000))
}
fn log_dyadic_taylor_poly(x: DyadicFloat128) -> DyadicFloat128 {
let mut r = LOG2P1_F128_POLY[12];
for i in (0..12).rev() {
r = x * r + LOG2P1_F128_POLY[i];
}
r * x
}
pub(crate) fn log2_dyadic(d: DyadicFloat128, x: f64) -> DyadicFloat128 {
let biased_exp = biased_exponent_f64(x);
let e = get_exponent_f64(x);
let base_mant = mantissa_f64(x);
let mant = base_mant + if biased_exp != 0 { 1u64 << 52 } else { 0 };
let lead = mant.leading_zeros();
let kk = e - (if lead > 11 { lead - 12 } else { 0 }) as i64;
let mut fe: i16 = kk as i16;
let adjusted_mant = mant << lead;
let mut i: i16 = (adjusted_mant >> 55) as i16;
if adjusted_mant > 0xb504f333f9de6484 {
fe = fe.wrapping_add(1);
i >>= 1;
}
let mut x = d;
x.exponent = x.exponent.wrapping_sub(fe);
let inverse_2 = LOG2P1_INVERSE_2[(i - 128) as usize];
let mut z = x * inverse_2;
const F128_MINUS_ONE: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -127,
mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
};
z = z + F128_MINUS_ONE;
const LOG2: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
};
let r = LOG2.mul_int64(fe as i64);
let mut p = log_dyadic_taylor_poly(z);
p = LOG2P1_LOG_INV_2[(i - 128) as usize] + p;
p + r
}
#[cold]
fn log2p1_accurate(x: f64) -> f64 {
let ax = x.abs();
if ax < f64::from_bits(0x3fa0000000000000) {
return if ax < f64::from_bits(0x3960000000000000) {
log2p1_accurate_tiny(x)
} else {
log2p1_accurate_small(x)
};
}
let dx = if x > 1.0 {
DoubleDouble::from_exact_add(x, 1.0)
} else {
DoubleDouble::from_exact_add(1.0, x)
};
let mut t: u64 = x.to_bits();
if dx.lo == 0. {
t = dx.hi.to_bits();
if (t.wrapping_shl(12)) == 0 {
let e = ((t >> 52) as i32).wrapping_sub(0x3ff);
return e as f64;
}
}
if (t.wrapping_shl(12)) == 0 {
let e: i32 = ((t >> 52) as i32).wrapping_sub(0x3ff);
if e >= 49 {
return e as f64 + f64::from_bits(0x3cf0000000000000); }
}
let x_d = DyadicFloat128::new_from_f64(dx.hi);
let mut y = log2_dyadic(x_d, dx.hi);
let mut c = DyadicFloat128::from_div_f64(dx.lo, dx.hi);
let mut bx = c * c;
bx.exponent -= 1;
bx.sign = DyadicSign::Neg;
c = c + bx;
y = y + c;
const LOG2_INV: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -115,
mantissa: 0xb8aa_3b29_5c17_f0bb_be87_fed0_691d_3e89_u128,
};
y = y * LOG2_INV;
y.exponent -= 12;
y.fast_as_f64()
}
pub fn f_log2p1(x: f64) -> f64 {
let x_u = x.to_bits();
let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff);
if e == 0x400 || x == 0. || x <= -1.0 {
if e == 0x400 && x.to_bits() != 0xfffu64 << 52 {
return x + x;
}
if x <= -1.0
{
return if x < -1.0 {
f64::NAN
} else {
f64::NEG_INFINITY
};
}
return x + x;
}
if 0 <= e && e <= 52 {
if x == f64::from_bits(LOG2P1_EXACT[e as usize]) {
return (e + 1) as f64;
}
}
if e == -1 && x < 0. {
let w = (1.0 + x).to_bits(); if w.wrapping_shl(12) == 0 {
let k: i32 = ((w >> 52) as i32).wrapping_sub(0x3ff);
return k as f64;
}
}
let (p, err) = log2p1_fast(x, e);
let left = p.hi + (p.lo - err);
let right = p.hi + (p.lo + err);
if left == right {
return left;
}
log2p1_accurate(x)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_log2p1() {
assert_eq!(f_log2p1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008344095884546873),
0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012037985753337781);
assert_eq!(f_log2p1(0.00006669877554532304), 0.00009622278377734607);
assert_eq!(f_log2p1(1.00006669877554532304), 1.0000481121941047);
assert_eq!(f_log2p1(-0.90006669877554532304), -3.322890675865049);
}
}