implied-vol 2.0.0

use crate::fused_multiply_add::MulAdd;
use crate::lets_be_rational::SpecialFn;
use crate::lets_be_rational::special_function::normal_distribution;
use std::cmp::Ordering;
use std::f64::consts::FRAC_1_SQRT_2;
use std::ops::Neg;

#[inline(always)]
fn ab(z: f64) -> f64 {
    const A: [f64; 5] = [
        3.161_123_743_870_565_6,
        113.864_154_151_050_16,
        377.485_237_685_302,
        3_209.377_589_138_469_4,
        0.185_777_706_184_603_15,
    ];
    const B: [f64; 4] = [
        23.601_290_952_344_122,
        244.024_637_934_444_17,
        1_282.616_526_077_372_3,
        2_844.236_833_439_171,
    ];
    z.mul_add2(A[4], A[0])
        .mul_add2(z, A[1])
        .mul_add2(z, A[2])
        .mul_add2(z, A[3])
        / z.mul_add2(1.0, B[0])
            .mul_add2(z, B[1])
            .mul_add2(z, B[2])
            .mul_add2(z, B[3])
}

#[inline(always)]
fn cd(y: f64) -> f64 {
    const C: [f64; 9] = [
        0.564_188_496_988_670_1,
        8.883_149_794_388_377,
        66.119_190_637_141_63,
        298.635_138_197_400_1,
        881.952_221_241_769,
        1_712.047_612_634_070_7,
        2_051.078_377_826_071_6,
        1_230.339_354_797_997_2,
        2.153_115_354_744_038_3E-8,
    ];
    const D: [f64; 8] = [
        15.744_926_110_709_835,
        117.693_950_891_312_5,
        537.181_101_862_009_9,
        1_621.389_574_566_690_3,
        3_290.799_235_733_459_7,
        4_362.619_090_143_247,
        3_439.367_674_143_721_6,
        1_230.339_354_803_749_5,
    ];
    C[8].mul_add2(y, C[0])
        .mul_add2(y, C[1])
        .mul_add2(y, C[2])
        .mul_add2(y, C[3])
        .mul_add2(y, C[4])
        .mul_add2(y, C[5])
        .mul_add2(y, C[6])
        .mul_add2(y, C[7])
        / (y + D[0])
            .mul_add2(y, D[1])
            .mul_add2(y, D[2])
            .mul_add2(y, D[3])
            .mul_add2(y, D[4])
            .mul_add2(y, D[5])
            .mul_add2(y, D[6])
            .mul_add2(y, D[7])
}

#[inline(always)]
fn pq(z: f64) -> f64 {
    const P: [f64; 6] = [
        0.305_326_634_961_232_36,
        0.360_344_899_949_804_45,
        0.125_781_726_111_229_26,
        0.016_083_785_148_742_275,
        6.587_491_615_298_378e-4,
        0.016_315_387_137_302_097,
    ];
    const Q: [f64; 5] = [
        2.568_520_192_289_822,
        1.872_952_849_923_460_4,
        0.527_905_102_951_428_5,
        0.060_518_341_312_441_32,
        0.002_335_204_976_268_691_8,
    ];
    z * (P[5]
        .mul_add2(z, P[0])
        .mul_add2(z, P[1])
        .mul_add2(z, P[2])
        .mul_add2(z, P[3])
        .mul_add2(z, P[4]))
        / ((z + Q[0])
            .mul_add2(z, Q[1])
            .mul_add2(z, Q[2])
            .mul_add2(z, Q[3])
            .mul_add2(z, Q[4]))
}

#[inline(always)]
fn smoothened_exponential_of_negative_square(y: f64) -> f64 {
    let y_tilde = (y * 16.0).trunc() / 16.0;
    (y_tilde * y_tilde).neg().exp() * (-(y - y_tilde) * (y + y_tilde)).exp()
}

#[inline(always)]
fn smoothened_exponential_of_positive_square(x: f64) -> f64 {
    let x_tilde = (x * 16.0).trunc() / 16.0;
    (x_tilde * x_tilde).exp() * ((x - x_tilde) * (x + x_tilde)).exp()
}

const THRESHOLD: f64 = 0.46875;
const XNEG: f64 = -26.628;
const XBIG: f64 = 26.543;

#[inline(always)]
pub(super) fn erf_cody(x: f64) -> f64 {
    const ONE_OVER_SQRT_PI: f64 = 0.564_189_583_547_756_3;
    let y = x.abs();
    if y <= THRESHOLD {
        //       |x| <= 0.46875
        return x * ab(y * y);
    }
    // Compute erfc(|x|)
    let erfc_abs_x = if y >= XBIG {
        0.0 // when |x| ≥ 26.543
    } else if y <= 4.0 {
        cd(y) // when 0.46875 < |x| <= 4.0
    } else {
        (ONE_OVER_SQRT_PI - pq((y * y).recip())) / y // when 4.0 < |x| < 26.543
    } * smoothened_exponential_of_negative_square(y);
    if x < 0.0 {
        erfc_abs_x - 1.0
    } else {
        1.0 - erfc_abs_x
    }
}

#[inline(always)]
pub(super) fn erfc_cody(x: f64) -> f64 {
    let y = x.abs();
    if y <= THRESHOLD {
        return ab(y * y).neg().mul_add2(x, 1.0);
    }
    let erfc_abs_x = if y >= XBIG {
        0.0
    } else {
        (if y <= 4.0 {
            cd(y)
        } else {
            (FRAC_1_SQRT_2 - pq((y * y).recip())) / y
        }) * smoothened_exponential_of_negative_square(y)
    };

    if x < 0.0 {
        2.0 - erfc_abs_x
    } else {
        erfc_abs_x
    }
}

#[inline(always)]
#[allow(clippy::neg_cmp_op_on_partial_ord)] // for performance reason
fn erfcx_cody_above_threshold(y: f64) -> f64 {
    debug_assert!(matches!(
        y.partial_cmp(&THRESHOLD),
        Some(Ordering::Greater) | None
    ));
    if y <= 4.0 {
        cd(y)
    } else {
        (FRAC_1_SQRT_2 - pq((y * y).recip())) / y
    }
}

#[inline(always)]
pub(super) fn erfcx_cody(x: f64) -> f64 {
    let y = x.abs();
    if y <= THRESHOLD {
        let z = y * y;
        return z.exp() * (ab(z).neg().mul_add2(x, 1.0));
    }
    if x < XNEG {
        return f64::MAX;
    }
    let result = erfcx_cody_above_threshold(y);
    if x < 0.0 {
        let expx2 = smoothened_exponential_of_positive_square(x);
        (expx2 + expx2) - result
    } else {
        result
    }
}

#[inline(always)]
pub(super) fn one_minus_erfcx<SpFn: SpecialFn + ?Sized>(x: f64) -> f64 {
    if (-1.0 / 5.0..=1.0 / 3.0).contains(&x) {
        x * (x
            .mul_add2(1.406_928_571_363_456_5E-2, 1.406_918_874_460_965E-1)
            .mul_add2(x, 5.768_900_120_887_374E-1)
            .mul_add2(x, 1.151_496_718_178_475_6)
            .mul_add2(x, 1.000_000_000_000_000_2)
            / x.mul_add2(1.246_332_072_834_634_7E-2, 1.358_008_134_514_386E-1)
                .mul_add2(x, 6.248_608_165_864_026E-1)
                .mul_add2(x, 1.508_990_859_374_272_3)
                .mul_add2(x, 1.903_749_496_242_156_3)
                .mul_add2(x, 1.0))
        .mul_add2(-x, std::f64::consts::FRAC_2_SQRT_PI)
    } else {
        1.0 - SpFn::erfcx(x)
    }
}

#[inline(always)]
pub(super) fn erfinv(e: f64) -> f64 {
    if e.abs() < 2.0 * normal_distribution::U_MAX {
        normal_distribution::inverse_norm_cdfm_half_for_midrange_probabilities(0.5 * e)
            * FRAC_1_SQRT_2
    } else {
        (if e < 0.0 {
            normal_distribution::inverse_norm_cdf_for_low_probabilities(e.mul_add2(0.5, 0.5))
        } else {
            -normal_distribution::inverse_norm_cdf_for_low_probabilities(e.mul_add2(-0.5, 0.5))
        }) * FRAC_1_SQRT_2
    }
}

#[cfg(test)]
mod tests {
    use crate::lets_be_rational::special_function::error_function::{
        THRESHOLD, XBIG, XNEG, erfc_cody, erfcx_cody,
    };

    #[test]
    fn calerf_1() {
        let x = erfc_cody(THRESHOLD + f64::EPSILON);
        assert_eq!(x, 0.507_386_526_782_061_8);
        let x = erfc_cody(THRESHOLD - f64::EPSILON);
        assert_eq!(x, 0.507_386_526_782_062_3);
        let x = erfc_cody(-THRESHOLD - f64::EPSILON);
        assert_eq!(x, 1.492_613_473_217_938_1);
        let x = erfc_cody(-THRESHOLD + f64::EPSILON);
        assert_eq!(x, 1.492_613_473_217_937_7);

        let x = erfc_cody(4.0 + f64::EPSILON);
        assert_eq!(x, 1.541_725_790_028_002e-8);
        let x = erfc_cody(4.0 - f64::EPSILON);
        assert_eq!(x, 1.541_725_790_028_002e-8);
        let x = erfc_cody(-4.0 - f64::EPSILON);
        assert_eq!(x, 1.999_999_984_582_742);
        let x = erfc_cody(-4.0 + f64::EPSILON);
        assert_eq!(x, 1.999_999_984_582_742);

        let x = erfc_cody(XBIG + f64::EPSILON);
        assert_eq!(x, 0.0);
        let x = erfc_cody(XBIG - f64::EPSILON);
        assert_eq!(x, 0.0);
        let x = erfc_cody(-XBIG - f64::EPSILON);
        assert_eq!(x, 2.0);
        let x = erfc_cody(-XBIG + f64::EPSILON);
        assert_eq!(x, 2.0);

        let x = erfc_cody(0.0 + f64::EPSILON);
        assert_eq!(x, 0.999_999_999_999_999_8);
        let x = erfc_cody(0.0 - f64::EPSILON);
        assert_eq!(x, 1.000_000_000_000_000_2);

        let x = erfc_cody(XNEG + f64::EPSILON);
        assert_eq!(x, 2.0);
        let x = erfc_cody(XNEG - f64::EPSILON);
        assert_eq!(x, 2.0);
    }

    #[test]
    fn calerf_2() {
        let x = erfcx_cody(THRESHOLD + f64::EPSILON);
        assert_eq!(x, 0.632_069_689_249_555_9);
        let x = erfcx_cody(THRESHOLD - f64::EPSILON);
        assert_eq!(x, 0.632_069_689_249_556_3);
        let x = erfcx_cody(-THRESHOLD - f64::EPSILON);
        assert_eq!(x, 1.859_402_416_871_422_7);
        let x = erfcx_cody(-THRESHOLD + f64::EPSILON);
        assert_eq!(x, 1.859_402_416_871_421_4);

        let x = erfcx_cody(4.0 + f64::EPSILON);
        assert_eq!(x, 0.136_999_457_625_061_4);
        let x = erfcx_cody(4.0 - f64::EPSILON);
        assert_eq!(x, 0.136_999_457_625_061_4);
        let x = erfcx_cody(-4.0 - f64::EPSILON);
        assert_eq!(x, 17_772_220.904_016_286);
        let x = erfcx_cody(-4.0 + f64::EPSILON);
        assert_eq!(x, 17_772_220.904_016_286);

        let x = erfcx_cody(XBIG + f64::EPSILON);
        assert_eq!(x, 0.026_624_994_527_838_793);
        let x = erfcx_cody(XBIG - f64::EPSILON);
        assert_eq!(x, 0.026_624_994_527_838_793);
        let x = erfcx_cody(-XBIG - f64::EPSILON);
        assert_eq!(x, 1.883_172_254_751_470_6e306);
        let x = erfcx_cody(-XBIG + f64::EPSILON);
        assert_eq!(x, 1.883_172_254_751_470_6e306);

        let x = erfcx_cody(0.0 + f64::EPSILON);
        assert_eq!(x, 0.999_999_999_999_999_8);
        let x = erfcx_cody(0.0 - f64::EPSILON);
        assert_eq!(x, 1.000_000_000_000_000_2);

        let x = erfcx_cody(XNEG + f64::EPSILON);
        assert_eq!(x, 1.728_618_506_590_026e308);
        let x = erfcx_cody(XNEG - f64::EPSILON);
        assert_eq!(x, 1.728_618_506_590_026e308);
    }
}