use crate::common::f_fmla;
#[derive(Copy, Clone, Default, Debug)]
pub(crate) struct Dekker {
pub(crate) lo: f64,
pub(crate) hi: f64,
}
impl Dekker {
#[inline]
pub(crate) const fn from_bit_pair(pair: (u64, u64)) -> Self {
Self {
lo: f64::from_bits(pair.0),
hi: f64::from_bits(pair.1),
}
}
#[inline]
pub(crate) const fn new(lo: f64, hi: f64) -> Self {
Dekker { lo, hi }
}
#[allow(dead_code)]
#[inline]
pub(crate) const fn split(a: f64) -> Dekker {
const CN: f64 = (1 << 27) as f64;
const C: f64 = CN + 1.0;
let t1 = C * a;
let t2 = a - t1;
let r_hi = t1 + t2;
let r_lo = a - r_hi;
Dekker::new(r_lo, r_hi)
}
#[allow(dead_code)]
#[inline]
fn from_exact_mult_impl_non_fma(asz: Dekker, a: f64, b: f64) -> Self {
let bs = Dekker::split(b);
let r_hi = a * b;
let t1 = asz.hi * bs.hi - r_hi;
let t2 = asz.hi * bs.lo + t1;
let t3 = asz.lo * bs.hi + t2;
let r_lo = asz.lo * bs.lo + t3;
Dekker::new(r_lo, r_hi)
}
#[inline]
pub(crate) const fn from_exact_add(a: f64, b: f64) -> Dekker {
let r_hi = a + b;
let t = r_hi - a;
let r_lo = b - t;
Dekker::new(r_lo, r_hi)
}
#[inline]
pub(crate) const fn from_exact_sub(a: f64, b: f64) -> Dekker {
let r_hi = a - b;
let t = a - r_hi;
let r_lo = t - b;
Dekker::new(r_lo, r_hi)
}
#[inline]
pub(crate) const fn from_full_exact_add(a: f64, b: f64) -> Dekker {
let r_hi = a + b;
let t1 = r_hi - a;
let t2 = r_hi - t1;
let t3 = b - t1;
let t4 = a - t2;
let r_lo = t3 + t4;
Dekker::new(r_lo, r_hi)
}
#[inline]
pub(crate) fn add(a: Dekker, b: Dekker) -> Dekker {
let s = a.hi + b.hi;
let d = s - a.hi;
let l = ((b.hi - d) + (a.hi + (d - s))) + (a.lo + b.lo);
Dekker::new(l, s)
}
#[inline]
pub(crate) fn div(a: Dekker, b: Dekker) -> Dekker {
let q = 1.0 / b.hi;
let r_hi = a.hi * q;
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
let e_hi = f_fmla(b.hi, -r_hi, a.hi);
let e_lo = f_fmla(b.lo, -r_hi, a.lo);
let r_lo = q * (e_hi + e_lo);
Dekker::new(r_lo, r_hi)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let b_hi_r_hi = Dekker::from_exact_mult(b.hi, -r_hi);
let b_lo_r_hi = Dekker::from_exact_mult(b.lo, -r_hi);
let e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
let e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
let r_lo = q * (e_hi + e_lo);
Dekker::new(r_lo, r_hi)
}
}
#[inline]
pub(crate) fn from_exact_mult(a: f64, b: f64) -> Self {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
))]
{
let r_hi = a * b;
let r_lo = f_fmla(a, b, -r_hi);
Dekker::new(r_lo, r_hi)
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
all(target_arch = "aarch64", target_feature = "neon")
)))]
{
let splat = Dekker::split(a);
Dekker::from_exact_mult_impl_non_fma(splat, a, b)
}
}
#[inline]
pub(crate) fn quick_mult(a: Dekker, b: Dekker) -> Self {
let mut r = Dekker::from_exact_mult(a.hi, b.hi);
let t1 = f_fmla(a.hi, b.lo, r.lo);
let t2 = f_fmla(a.lo, b.hi, t1);
r.lo = t2;
r
}
#[inline]
pub(crate) fn mult(a: Dekker, b: Dekker) -> Self {
let ahlh = b.hi * a.lo;
let alhh = b.lo * a.hi;
let ahhh = b.hi * a.hi;
let mut ahhl = f_fmla(b.hi, a.hi, -ahhh);
ahhl += alhh + ahlh;
let ch = ahhh + ahhl;
let l = (ahhh - ch) + ahhl;
Self { lo: l, hi: ch }
}
#[inline]
pub(crate) fn mult_f64(a: Dekker, b: f64) -> Self {
let ahlh = b * a.lo;
let ahhh = b * a.hi;
let mut ahhl = f_fmla(b, a.hi, -ahhh);
ahhl += ahlh;
let ch = ahhh + ahhl;
let l = (ahhh - ch) + ahhl;
Dekker::new(l, ch)
}
#[inline]
pub(crate) fn f64_mult(a: f64, b: Dekker) -> Dekker {
let mut p = Dekker::from_exact_mult(a, b.hi);
p.lo = f_fmla(a, b.lo, p.lo);
p
}
#[inline]
pub(crate) fn quick_mult_f64(a: Dekker, b: f64) -> Self {
let h = b * a.hi;
let l = b * a.lo + f_fmla(b, a.hi, -h);
Self { lo: l, hi: h }
}
#[inline]
pub(crate) fn add_f64(a: Dekker, b: f64) -> Self {
let t = Dekker::from_exact_add(a.hi, b);
let l = a.lo + t.lo;
Self { lo: l, hi: t.hi }
}
#[inline]
pub(crate) fn f64_add(b: f64, a: Dekker) -> Self {
let t = Dekker::from_exact_add(b, a.hi);
let l = a.lo + t.lo;
Self { lo: l, hi: t.hi }
}
#[inline]
pub(crate) const fn to_f64(self) -> f64 {
self.lo + self.hi
}
}