use crate::bessel::alpha0::{
bessel_0_asympt_alpha, bessel_0_asympt_alpha_fast, bessel_0_asympt_alpha_hard,
};
use crate::bessel::beta0::{
bessel_0_asympt_beta, bessel_0_asympt_beta_fast, bessel_0_asympt_beta_hard,
};
use crate::bessel::i0::bessel_rsqrt_hard;
use crate::bessel::j0_coeffs_remez::J0_COEFFS_REMEZ;
use crate::bessel::j0_coeffs_taylor::J0_COEFFS_TAYLOR;
use crate::bessel::j0f_coeffs::{J0_ZEROS, J0_ZEROS_VALUE};
use crate::common::f_fmla;
use crate::double_double::DoubleDouble;
use crate::dyadic_float::{DyadicFloat128, DyadicSign};
use crate::polyeval::{f_polyeval9, f_polyeval10, f_polyeval12, f_polyeval19};
use crate::rounding::CpuCeil;
use crate::sin_helper::{cos_dd_small, cos_dd_small_fast, cos_f128_small};
use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128};
pub fn f_j0(x: f64) -> f64 {
let ux = x.to_bits().wrapping_shl(1);
if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
if ux <= 0x77723ef88126da90u64 {
return 1.;
}
if ux <= 0x7960000000000000u64 {
let half_x = 0.5 * x; return f_fmla(half_x, -half_x, 1.);
}
if x.is_infinite() {
return 0.;
}
return x + f64::NAN; }
let x_abs = x.to_bits() & 0x7fff_ffff_ffff_ffff;
let ax = f64::from_bits(x_abs);
if x_abs <= 0x4052b33333333333u64 {
if x_abs <= 0x3ff199999999999au64 {
return j0_maclaurin_series_fast(ax);
}
return j0_small_argument_fast(ax);
}
j0_asympt_fast(ax)
}
#[inline]
pub(crate) fn j0_maclaurin_series_fast(x: f64) -> f64 {
const C: [u64; 12] = [
0x3ff0000000000000,
0xbfd0000000000000,
0x3f90000000000000,
0xbf3c71c71c71c71c,
0x3edc71c71c71c71c,
0xbe723456789abcdf,
0x3e002e85c0898b71,
0xbd8522a43f65486a,
0x3d0522a43f65486a,
0xbc80b313289be0b9,
0x3bf5601885e63e5d,
0xbb669ca9cf3b7f54,
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval10(
dx2.hi,
f64::from_bits(C[2]),
f64::from_bits(C[3]),
f64::from_bits(C[4]),
f64::from_bits(C[5]),
f64::from_bits(C[6]),
f64::from_bits(C[7]),
f64::from_bits(C[8]),
f64::from_bits(C[9]),
f64::from_bits(C[10]),
f64::from_bits(C[11]),
);
let mut z = DoubleDouble::mul_f64_add_f64(dx2, p, f64::from_bits(C[1]));
z = DoubleDouble::mul_add_f64(dx2, z, f64::from_bits(C[0]));
let err = f_fmla(
dx2.hi,
f64::from_bits(0x3c70000000000000), f64::from_bits(0x3b40000000000000), );
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j0_maclaurin_series(x)
}
#[cold]
pub(crate) fn j0_maclaurin_series(x: f64) -> f64 {
const C: [(u64, u64); 12] = [
(0x0000000000000000, 0x3ff0000000000000),
(0x0000000000000000, 0xbfd0000000000000),
(0x0000000000000000, 0x3f90000000000000),
(0xbbdc71c71c71c71c, 0xbf3c71c71c71c71c),
(0x3b7c71c71c71c71c, 0x3edc71c71c71c71c),
(0xbab23456789abcdf, 0xbe723456789abcdf),
(0xba8b6edec0692e65, 0x3e002e85c0898b71),
(0x3a2604db055bd075, 0xbd8522a43f65486a),
(0xb9a604db055bd075, 0x3d0522a43f65486a),
(0x3928824198c6f6e1, 0xbc80b313289be0b9),
(0xb869b0b430eb27b8, 0x3bf5601885e63e5d),
(0x380ee6b4638f3a25, 0xbb669ca9cf3b7f54),
];
let dx2 = DoubleDouble::from_exact_mult(x, x);
let p = f_polyeval12(
dx2,
DoubleDouble::from_bit_pair(C[0]),
DoubleDouble::from_bit_pair(C[1]),
DoubleDouble::from_bit_pair(C[2]),
DoubleDouble::from_bit_pair(C[3]),
DoubleDouble::from_bit_pair(C[4]),
DoubleDouble::from_bit_pair(C[5]),
DoubleDouble::from_bit_pair(C[6]),
DoubleDouble::from_bit_pair(C[7]),
DoubleDouble::from_bit_pair(C[8]),
DoubleDouble::from_bit_pair(C[9]),
DoubleDouble::from_bit_pair(C[10]),
DoubleDouble::from_bit_pair(C[11]),
);
let r = DoubleDouble::from_exact_add(p.hi, p.lo);
const ERR: f64 = f64::from_bits(0x39d0000000000000); let ub = r.hi + (r.lo + ERR);
let lb = r.hi + (r.lo - ERR);
if ub == lb {
return r.to_f64();
}
j0_maclaurin_series_hard(x)
}
#[cold]
#[inline(never)]
pub(crate) fn j0_maclaurin_series_hard(x: f64) -> f64 {
static P: [DyadicFloat128; 12] = [
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -127,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -129,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -133,
mantissa: 0x80000000_00000000_00000000_00000000_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -139,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -145,
mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -151,
mantissa: 0x91a2b3c4_d5e6f809_1a2b3c4d_5e6f8092_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -158,
mantissa: 0x81742e04_4c5b8724_8909fcb6_8cd4e410_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -166,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -174,
mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -182,
mantissa: 0x85989944_df05c4ef_b7cce721_23e1b391_u128,
},
DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -191,
mantissa: 0xab00c42f_31f2e799_3d2f3c53_6120e5d8_u128,
},
DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -200,
mantissa: 0xb4e54e79_dbfa9c23_29738e18_bb602809_u128,
},
];
let dx = DyadicFloat128::new_from_f64(x);
let x2 = dx * dx;
let mut p = P[11];
for i in (0..11).rev() {
p = x2 * p + P[i];
}
p.fast_as_f64()
}
#[inline]
pub(crate) fn j0_small_argument_fast(x: f64) -> f64 {
const INV_STEP: f64 = 0.6299043751549631;
let fx = x * INV_STEP;
const J0_ZEROS_COUNT: f64 = (J0_ZEROS.len() - 1) as f64;
let idx0 = unsafe { fx.min(J0_ZEROS_COUNT).to_int_unchecked::<usize>() };
let idx1 = unsafe {
fx.cpu_ceil()
.min(J0_ZEROS_COUNT)
.to_int_unchecked::<usize>()
};
let found_zero0 = DoubleDouble::from_bit_pair(J0_ZEROS[idx0]);
let found_zero1 = DoubleDouble::from_bit_pair(J0_ZEROS[idx1]);
let dist0 = (found_zero0.hi - x).abs();
let dist1 = (found_zero1.hi - x).abs();
let (found_zero, idx, dist) = if dist0 < dist1 {
(found_zero0, idx0, dist0)
} else {
(found_zero1, idx1, dist1)
};
if idx == 0 {
return j0_maclaurin_series_fast(x);
}
let is_too_close_too_zero = dist.abs() < 1e-3;
let c = if is_too_close_too_zero {
&J0_COEFFS_TAYLOR[idx - 1]
} else {
&J0_COEFFS_REMEZ[idx - 1]
};
let r = DoubleDouble::full_add_f64(-found_zero, x.abs());
if dist == 0. {
return f64::from_bits(J0_ZEROS_VALUE[idx]);
}
let p = f_polyeval19(
r.hi,
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
f64::from_bits(c[9].1),
f64::from_bits(c[10].1),
f64::from_bits(c[11].1),
f64::from_bits(c[12].1),
f64::from_bits(c[13].1),
f64::from_bits(c[14].1),
f64::from_bits(c[15].1),
f64::from_bits(c[16].1),
f64::from_bits(c[17].1),
f64::from_bits(c[18].1),
f64::from_bits(c[19].1),
f64::from_bits(c[20].1),
f64::from_bits(c[21].1),
f64::from_bits(c[22].1),
f64::from_bits(c[23].1),
);
let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
let err = f_fmla(
z.hi,
f64::from_bits(0x3c70000000000000), f64::from_bits(0x3bf0000000000000), );
let ub = z.hi + (z.lo + err);
let lb = z.hi + (z.lo - err);
if ub == lb {
return z.to_f64();
}
j0_small_argument_dd(r, c)
}
#[cold]
fn j0_small_argument_dd(r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 {
let c = &c0[15..];
let p0 = f_polyeval9(
r.to_f64(),
f64::from_bits(c[0].1),
f64::from_bits(c[1].1),
f64::from_bits(c[2].1),
f64::from_bits(c[3].1),
f64::from_bits(c[4].1),
f64::from_bits(c[5].1),
f64::from_bits(c[6].1),
f64::from_bits(c[7].1),
f64::from_bits(c[8].1),
);
let c = c0;
let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3c10000000000000), f64::from_bits(0x3a90000000000000), );
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub != lb {
return j0_small_argument_hard(r, c);
}
p.to_f64()
}
#[cold]
#[inline(never)]
fn j0_small_argument_hard(r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 {
let mut p = DoubleDouble::from_bit_pair(c[23]);
for i in (0..23).rev() {
p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i]));
p = DoubleDouble::from_exact_add(p.hi, p.lo);
}
p.to_f64()
}
#[inline]
pub(crate) fn j0_asympt_fast(x: f64) -> f64 {
let x = x.abs();
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let recip = if x.to_bits() > 0x7fd000000000000u64 {
DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_div_fma(4.0, x), 0.25)
} else {
DoubleDouble::from_recip(x)
};
let alpha = bessel_0_asympt_alpha_fast(recip);
let beta = bessel_0_asympt_beta_fast(recip);
let AngleReduced { angle } = rem2pi_any(x);
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_cos = cos_dd_small_fast(r0);
let z0 = DoubleDouble::quick_mult(beta, m_cos);
let r_sqrt = DoubleDouble::from_rsqrt_fast(x);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let p = DoubleDouble::quick_mult(scale, z0);
let err = f_fmla(
p.hi,
f64::from_bits(0x3be0000000000000), f64::from_bits(0x3a60000000000000), );
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64();
}
j0_asympt(x, recip, r_sqrt, angle)
}
pub(crate) fn j0_asympt(
x: f64,
recip: DoubleDouble,
r_sqrt: DoubleDouble,
angle: DoubleDouble,
) -> f64 {
const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc8cbc0d30ebfd15),
f64::from_bits(0x3fe9884533d43651),
);
const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
f64::from_bits(0xbc81a62633145c07),
f64::from_bits(0xbfe921fb54442d18),
);
let alpha = bessel_0_asympt_alpha(recip);
let beta = bessel_0_asympt_beta(recip);
let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
let m_cos = cos_dd_small(r0);
let z0 = DoubleDouble::quick_mult(beta, m_cos);
let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
let r = DoubleDouble::quick_mult(scale, z0);
let p = DoubleDouble::from_exact_add(r.hi, r.lo);
let err = f_fmla(
p.hi,
f64::from_bits(0x3bd0000000000000), f64::from_bits(0x39e0000000000000), );
let ub = p.hi + (p.lo + err);
let lb = p.hi + (p.lo - err);
if ub == lb {
return p.to_f64();
}
j0_asympt_hard(x)
}
#[cold]
#[inline(never)]
pub(crate) fn j0_asympt_hard(x: f64) -> f64 {
const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Pos,
exponent: -128,
mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128,
};
const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 {
sign: DyadicSign::Neg,
exponent: -128,
mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
};
let x_dyadic = DyadicFloat128::new_from_f64(x);
let recip = DyadicFloat128::accurate_reciprocal(x);
let alpha = bessel_0_asympt_alpha_hard(recip);
let beta = bessel_0_asympt_beta_hard(recip);
let angle = rem2pi_f128(x_dyadic);
let x0pi34 = MPI_OVER_4 - alpha;
let r0 = angle + x0pi34;
let m_sin = cos_f128_small(r0);
let z0 = beta * m_sin;
let r_sqrt = bessel_rsqrt_hard(x, recip);
let scale = SQRT_2_OVER_PI * r_sqrt;
let p = scale * z0;
p.fast_as_f64()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_j0() {
assert_eq!(f_j0(f64::EPSILON), 1.0);
assert_eq!(f_j0(-0.000000000000000000000532), 1.0);
assert_eq!(f_j0(0.0000000000000000000532), 1.0);
assert_eq!(f_j0(-2.000976555054876), 0.22332760641907712);
assert_eq!(f_j0(-2.3369499004222215E+304), -3.3630754230844632e-155);
assert_eq!(
f_j0(f64::from_bits(0xd71a31ffe2ff7e9f)),
f64::from_bits(0xb2e58532f95056ff)
);
assert_eq!(f_j0(6.1795701510782757E+307), 6.075192922402001e-155);
assert_eq!(f_j0(6.1795701510782757E+301), 4.118334155030934e-152);
assert_eq!(f_j0(6.1795701510782757E+157), 9.5371668900364e-80);
assert_eq!(f_j0(79.), -0.08501719554953485);
#[cfg(any(
all(target_arch = "x86_64", target_feature = "fma"),
target_arch = "aarch64"
))]
assert_eq!(f_j0(93.463718781944774171190), 2.7038169012530046e-16);
assert_eq!(f_j0(99.746819858680596470279979), -8.419106281522749e-17);
assert_eq!(f_j0(f64::INFINITY), 0.);
assert_eq!(f_j0(f64::NEG_INFINITY), 0.);
assert!(f_j0(f64::NAN).is_nan());
}
}