use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::polyeval::f_polyeval6;
#[cold]
fn sqrt1pm1_near_zero_hard(x: f64) -> f64 {
const C: [(u64, u64); 10] = [
(0x3af2bd7b2a000000, 0x3fe0000000000000),
(0xbb1e743d7fe00000, 0xbfc0000000000000),
(0xbc18d267496ae7f8, 0x3fb0000000000000),
(0x3c2d7a2a887e1af8, 0xbfa4000000000000),
(0xbc3a4965117e40c8, 0x3f9c00000000150b),
(0x3c3dc057cb5bf82c, 0xbf9500000000209e),
(0x3c02d0756979aa48, 0x3f907ffff9c1db3d),
(0xbbe30b3d9b8a1020, 0xbf8acffff10fbdbc),
(0xbc16d26d5f2efc04, 0x3f8659830d1bf014),
(0x3c212aabd12c483e, 0xbf82ff830f9799c4),
];
let mut p = DoubleDouble::quick_mul_f64_add(
DoubleDouble::from_bit_pair(C[9]),
x,
DoubleDouble::from_bit_pair(C[8]),
);
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[7]));
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[6]));
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[5]));
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[4]));
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[3]));
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[2]));
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[1]));
p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[0]));
p = DoubleDouble::quick_mult_f64(p, x);
p.to_f64()
}
pub fn f_sqrt1pm1(x: f64) -> f64 {
let ix = x.to_bits();
let ux = ix.wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
if ux == 0 {
return 0.;
}
if ux.wrapping_shl(11) == 0 {
return if x.is_sign_negative() {
f64::NAN
} else {
f64::INFINITY
};
}
if ux <= 0x7960000000000000u64 {
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
))]
{
return f_fmla(-0.25 * x, 0.25 * x, x * 0.5);
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
)))]
{
use crate::common::dyad_fmla;
return if x < 1e-150 {
dyad_fmla(-0.25 * x, 0.25 * x, x * 0.5)
} else {
f_fmla(-0.25 * x, 0.25 * x, x * 0.5)
};
}
}
return f64::NAN; }
if (ix >> 63) != 0 && ux >= 0x7fe0000000000000u64 {
if ux == 0x7fe0000000000000u64 {
return -1.;
}
return f64::NAN;
}
if ux <= 0x7f1126e978d4fdf4u64 {
const C: [u64; 7] = [
0x3fe0000000000000,
0xbfc000000000009a,
0x3fb0000000000202,
0xbfa3fffffdf5a853,
0x3f9bfffffb3c75fe,
0xbf9500dd6aee8501,
0x3f9080d2ece21348,
];
let p = f_polyeval6(
x,
f64::from_bits(C[1]),
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
) * x;
let r = DoubleDouble::from_exact_add(f64::from_bits(C[0]), p);
let q = DoubleDouble::quick_mult_f64(r, x);
let err = f_fmla(
q.hi,
f64::from_bits(0x3c50000000000000), f64::from_bits(0x3bf0000000000000), );
let ub = q.hi + (q.lo + err);
let lb = q.hi + (q.lo - err);
if ub == lb {
return ub;
}
return sqrt1pm1_near_zero_hard(x);
}
#[cfg(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
))]
{
let r = DoubleDouble::from_full_exact_add(x, 1.0);
let v_sqrt = r.fast_sqrt();
let q = DoubleDouble::full_add_f64(v_sqrt, -1.0);
q.to_f64()
}
#[cfg(not(any(
all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "fma"
),
target_arch = "aarch64"
)))]
{
use crate::double_double::two_product_compatible;
if !two_product_compatible(x) {
let r = x + 1.;
let v_sqrt = r.sqrt();
DoubleDouble::from_full_exact_sub(v_sqrt, 1.0).to_f64()
} else {
let r = DoubleDouble::from_full_exact_add(x, 1.0);
let v_sqrt = r.fast_sqrt();
let q = DoubleDouble::full_add_f64(v_sqrt, -1.0);
q.to_f64()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sqrt1pm1() {
assert_eq!(f_sqrt1pm1(15.), 3.0);
assert_eq!(f_sqrt1pm1(24.), 4.0);
assert_eq!(f_sqrt1pm1(8.), 2.0);
assert_eq!(f_sqrt1pm1(-0.75), -0.5);
assert_eq!(f_sqrt1pm1(0.5), 0.22474487139158905);
assert_eq!(f_sqrt1pm1(0.0005233212), 0.00026162637581973774);
assert_eq!(f_sqrt1pm1(-0.0005233212), -0.0002616948420951896);
assert_eq!(f_sqrt1pm1(-0.00000000000000000000005233212), -2.616606e-23);
assert_eq!(f_sqrt1pm1(0.00000000000000000000005233212), 2.616606e-23);
assert_eq!(f_sqrt1pm1(0.), 0.);
assert_eq!(f_sqrt1pm1(-0.), 0.);
assert_eq!(f_sqrt1pm1(f64::INFINITY), f64::INFINITY);
assert!(f_sqrt1pm1(f64::NEG_INFINITY).is_nan());
assert!(f_sqrt1pm1(f64::NAN).is_nan());
assert!(f_sqrt1pm1(-1.0001).is_nan());
}
}