#![allow(clippy::excessive_precision)]
use crate::bessel::alpha1::{
bessel_1_asympt_alpha, bessel_1_asympt_alpha_fast, bessel_1_asympt_alpha_hard,
};
use crate::bessel::beta1::{
bessel_1_asympt_beta, bessel_1_asympt_beta_fast, bessel_1_asympt_beta_hard,
};
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::j1_coeffs::{J1_COEFFS, J1_ZEROS, J1_ZEROS_VALUE};
use crate::bessel::j1_coeffs_taylor::J1_COEFFS_TAYLOR;
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::{f_polyeval8, f_polyeval9, f_polyeval12, f_polyeval19};
use crate::rounding::CpuCeil;
use crate::sin_helper::{sin_dd_small, sin_dd_small_fast, sin_f128_small};
use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128};
pub fn f_j1(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
if ux <= 0x72338c9356bb0314u64 {
return x * 0.5;
}
if ux <= 0x7960000000000000u64 {
let quad_part_x = x * 0.125; return f_fmla(quad_part_x, -quad_part_x, 0.5) * x;
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; }
let ax: u64 = x.to_bits() & 0x7fff_ffff_ffff_ffff;
if ax < 0x4052a6784230fcf8u64 {
if ax < 0x3feccccccccccccd {
return j1_maclaurin_series_fast(x);
}
return j1_small_argument_fast(x);
}
j1_asympt_fast(x)
}
#[inline]
fn j1_asympt_fast(x: f64) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let recip = if x.to_bits() > 0x7fd000000000000u64 {
DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_div_fma(4.0, x), 0.25)
} else {
DoubleDouble::from_recip(x)
};
let alpha = bessel_1_asympt_alpha_fast(recip);
let beta = bessel_1_asympt_beta_fast(recip);
let AngleReduced { angle } = rem2pi_any(x);
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let r_sqrt = DoubleDouble::from_rsqrt_fast(x);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
let err = f_fmla(
p.hi,
f64::from_bits(0x3be0000000000000), f64::from_bits(0x3a60000000000000), );
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64() * sign_scale;
}
j1_asympt(origin_x, recip, r_sqrt, angle)
}
fn j1_asympt(x: f64, recip: DoubleDouble, r_sqrt: DoubleDouble, angle: DoubleDouble) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let alpha = bessel_1_asympt_alpha(recip);
let beta = bessel_1_asympt_beta(recip);
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_sin = sin_dd_small(r0);
let z0 = DoubleDouble::quick_mult(beta, m_sin);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let r = DoubleDouble::quick_mult(scale, z0);
let p = DoubleDouble::from_exact_add(r.hi, r.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bc0000000000000), f64::from_bits(0x39c0000000000000), );
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64() * sign_scale;
}
j1_asympt_hard(origin_x)
}
#[inline]
pub(crate) fn j1_maclaurin_series_fast(x: f64) -> f64 {
const C0: DoubleDouble = DoubleDouble::from_bit_pair((0x3b30e9e087200000, 0x3fe0000000000000));
let x2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval12(
x2.hi,
f64::from_bits(0xbfb0000000000000),
f64::from_bits(0x3f65555555555555),
f64::from_bits(0xbf0c71c71c71c45e),
f64::from_bits(0x3ea6c16c16b82b02),
f64::from_bits(0xbe3845c87ec0cbef),
f64::from_bits(0x3dc27e0313e8534c),
f64::from_bits(0xbd4443dd2d0305d0),
f64::from_bits(0xbd0985a435fe9aa1),
f64::from_bits(0x3d10c82d92c46d30),
f64::from_bits(0xbd0aa3684321f219),
f64::from_bits(0x3cf8351f29ac345a),
f64::from_bits(0xbcd333fe6cd52c9f),
);
let mut z = DoubleDouble::mul_f64_add(x2, p, C0);
z = DoubleDouble::quick_mult_f64(z, x);
let err = f_fmla(
x2.hi,
f64::from_bits(0x3c70000000000000), f64::from_bits(0x3b40000000000000), );
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j1_maclaurin_series(x)
}
pub(crate) fn j1_maclaurin_series(x: f64) -> f64 {
let origin_x = x;
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const CL: [(u64, u64); 5] = [
(0xb930000000000000, 0x3fe0000000000000),
(0x39c8e80000000000, 0xbfb0000000000000),
(0x3c05555554f3add7, 0x3f65555555555555),
(0xbbac71c4eb0f8c94, 0xbf0c71c71c71c71c),
(0xbb3f56b7a43206d4, 0x3ea6c16c16c16c17),
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval8(
dx2.hi,
f64::from_bits(0xbe3845c8a0ce5129),
f64::from_bits(0x3dc27e4fb7789ea2),
f64::from_bits(0xbd4522a43f633af1),
f64::from_bits(0x3cc2c97589d53f97),
f64::from_bits(0xbc3ab8151dca7912),
f64::from_bits(0x3baf08732286d1d4),
f64::from_bits(0xbb10ac65637413f4),
f64::from_bits(0xbae4d8336e4f779c),
);
let mut p_e = DoubleDouble::mul_f64_add(dx2, p, DoubleDouble::from_bit_pair(CL[4]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[3]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[2]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[1]));
p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[0]));
let p = DoubleDouble::quick_mult_f64(p_e, x);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bd0000000000000), f64::from_bits(0x3a00000000000000), );
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j1_maclaurin_series_hard(origin_x);
}
p.to_f64() * sign_scale
}
#[cold]
#[inline(never)]
fn j1_maclaurin_series_hard(x: f64) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
static C: [DyadicFloat128; 13] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -131,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -136,
mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -142,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -148,
mantissa: 0xb60b60b6_0b60b60b_60b60b60_b60b60b6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -155,
mantissa: 0xc22e4506_72894ab6_cd8efb11_d33f5618_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -162,
mantissa: 0x93f27dbb_c4fae397_780b69f5_333c725b_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -170,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -178,
mantissa: 0x964bac6d_7ae67d8d_aec68405_485dea03_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -187,
mantissa: 0xd5c0f53a_fe6fa17f_8c7b0b68_39691f4e_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -196,
mantissa: 0xf8bb4be7_8e7896b0_58fee362_01a4370c_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -205,
mantissa: 0xf131bdf7_cff8d02e_e1ef6820_f9d58ab6_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -214,
mantissa: 0xc5e72c48_0d1aec75_3caa2e0d_edd008ca_u128,
},
];
let rx = DyadicFloat128::new_from_f64(x);
let dx = rx * rx;
let mut p = C[12];
for i in (0..12).rev() {
p = dx * p + C[i];
}
(p * rx).fast_as_f64() * sign_scale
}
#[inline]
pub(crate) fn j1_small_argument_fast(x: f64) -> f64 {
static SIGN: [f64; 2] = [1., -1.];
let sign_scale = SIGN[x.is_sign_negative() as usize];
let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
const INV_STEP: f64 = 0.6300176043004198;
let fx = x_abs * INV_STEP;
const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe {
fx.cpu_ceil()
.min(J1_ZEROS_COUNT)
.to_int_unchecked::<usize>()
};
let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x_abs).abs();
let dist1 = (found_zero1.hi - x_abs).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j1_maclaurin_series_fast(x);
}
let r = DoubleDouble::full_add_f64(-found_zero, x_abs);
if dist == 0. {
return f64::from_bits(J1_ZEROS_VALUE[idx]) * sign_scale;
}
let is_zero_too_close = dist.abs() < 1e-3;
let c = if is_zero_too_close {
&J1_COEFFS_TAYLOR[idx - 1]
} else {
&J1_COEFFS[idx - 1]
};
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), f64::from_bits(0x3bf0000000000000), );
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64() * sign_scale;
}
j1_small_argument_dd(sign_scale, r, c)
}
fn j1_small_argument_dd(sign_scale: f64, r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3c10000000000000), f64::from_bits(0x3a00000000000000), );
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j1_small_argument_path_hard(sign_scale, r, c);
}
p.to_f64() * sign_scale
}
#[cold]
#[inline(never)]
fn j1_small_argument_path_hard(sign_scale: f64, r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 {
let mut p = DoubleDouble::from_bit_pair(c[23]);
for i in (0..23).rev() {
p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
}
p.to_f64() * sign_scale
}
#[cold]
#[inline(never)]
fn j1_asympt_hard(x: f64) -> f64 {
static SGN: [f64; 2] = [1., -1.];
let sign_scale = SGN[x.is_sign_negative() as usize];
let x = x.abs();
const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128,
};
const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let x_dyadic = DyadicFloat128::new_from_f64(x);
let recip = DyadicFloat128::accurate_reciprocal(x);
let alpha = bessel_1_asympt_alpha_hard(recip);
let beta = bessel_1_asympt_beta_hard(recip);
let angle = rem2pi_f128(x_dyadic);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = sin_f128_small(r0);
let z0 = beta * m_sin;
let r_sqrt = bessel_rsqrt_hard(x, recip);
let scale = SQRT_2_OVER_PI * r_sqrt;
let p = scale * z0;
p.fast_as_f64() * sign_scale
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_j1() {
assert_eq!(f_j1(0.000000000000000000000000000000001241), 6.205e-34);
assert_eq!(f_j1(0.0000000000000000000000000000004321), 2.1605e-31);
assert_eq!(f_j1(0.00000000000000000004321), 2.1605e-20);
assert_eq!(f_j1(73.81695991658546), -0.06531447184607607);
assert_eq!(f_j1(0.01), 0.004999937500260416);
assert_eq!(f_j1(0.9), 0.4059495460788057);
assert_eq!(
f_j1(162605674999778540000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008686943178258183
);
assert_eq!(f_j1(3.831705970207517), -1.8501090915423025e-15);
assert_eq!(f_j1(-3.831705970207517), 1.8501090915423025e-15);
assert_eq!(f_j1(-6.1795701510782757E+307), 8.130935041593236e-155);
assert_eq!(
f_j1(0.000000000000000000000000000000000000008827127),
0.0000000000000000000000000000000000000044135635
);
assert_eq!(
f_j1(-0.000000000000000000000000000000000000008827127),
-0.0000000000000000000000000000000000000044135635
);
assert_eq!(f_j1(5.4), -0.3453447907795863);
assert_eq!(
f_j1(77.743162408196766932633181568235159),
0.09049267898021947
);
assert_eq!(
f_j1(84.027189586293545175976760219782591),
0.0870430264022591
);
assert_eq!(f_j1(f64::NEG_INFINITY), 0.0);
assert_eq!(f_j1(f64::INFINITY), 0.0);
assert!(f_j1(f64::NAN).is_nan());
}
}