pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::polyeval::f_polyeval6;

/// Computes sqrt(1+x) - 1
pub fn f_sqrt1pm1f(x: f32) -> f32 {
    // filter out exceptional cases
    let ix = x.to_bits();
    let ux = ix.wrapping_shl(1);
    if ux >= 0xffu32 << 24 || ux <= 0x68000000u32 {
        // |x| <= f32::EPSILON, |x| == inf, x == NaN
        if ux == 0 {
            // |x| == 0
            return 0.;
        }
        if ux.wrapping_shl(8) == 0 {
            // |x| == Inf
            return if x.is_sign_negative() {
                f32::NAN
            } else {
                f32::INFINITY
            };
        }

        if ux <= 0x68000000u32 {
            // |x| <= f32::EPSILON
            // Sqrt(1+x) - 1 ~ x/2-x^2/8 + O(x^3)
            #[cfg(any(
                all(
                    any(target_arch = "x86", target_arch = "x86_64"),
                    target_feature = "fma"
                ),
                target_arch = "aarch64"
            ))]
            {
                use crate::common::f_fmlaf;
                return f_fmlaf(-0.25 * x, 0.25 * x, x * 0.5);
            }
            #[cfg(not(any(
                all(
                    any(target_arch = "x86", target_arch = "x86_64"),
                    target_feature = "fma"
                ),
                target_arch = "aarch64"
            )))]
            {
                use crate::common::f_fmla;
                let dx = x as f64;
                return f_fmla(-0.25 * dx, 0.25 * dx, dx * 0.5) as f32;
            }
        }

        return f32::NAN; // x == NaN
    }
    if (ix >> 31) != 0 && ux >= 0x7f00_0000 {
        // x < 0 and x >= 1
        if ux == 0x7f00_0000u32 {
            // x == -1
            return -1.;
        }
        return f32::NAN;
    }

    if ux <= 0x78eb_851eu32 {
        // |x| <= 0.015

        // Polynomial generated by Sollya:
        // d = [-0.015, 0.015];
        // sqrt1pm1 = sqrt(x + 1) - 1;
        // Q = fpminimax(sqrt1pm1, [|1,2,3,4,5,6|], [|D...|], d);
        const C: [u64; 6] = [
            0x3fe0000000000034,
            0xbfc00000000002ee,
            0x3faffffffc0d541c,
            0xbfa3fffffa546ce5,
            0x3f9c016d2be05c25,
            0xbf95016d1ae9e058,
        ];
        let dx = x as f64;
        let p = f_polyeval6(
            dx,
            f64::from_bits(C[0]),
            f64::from_bits(C[1]),
            f64::from_bits(C[2]),
            f64::from_bits(C[3]),
            f64::from_bits(C[4]),
            f64::from_bits(C[5]),
        ) * dx;
        return p as f32;
    }

    let dx = x as f64;
    ((dx + 1.).sqrt() - 1.) as f32
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sqrt1pm1f() {
        assert_eq!(f_sqrt1pm1f(-0.9), -0.6837722);
        assert_eq!(f_sqrt1pm1f(24.), 4.);
        assert_eq!(f_sqrt1pm1f(-0.75), -0.5);
        assert_eq!(f_sqrt1pm1f(8.), 2.);
        assert_eq!(f_sqrt1pm1f(9.), 2.1622777);
        assert_eq!(f_sqrt1pm1f(12.31), 2.6482873);
        assert_eq!(f_sqrt1pm1f(0.005231), 0.0026120886);
        assert_eq!(f_sqrt1pm1f(0.00000000005231), 2.6155e-11);
        assert_eq!(f_sqrt1pm1f(-0.00000000005231), -2.6155e-11);
        assert_eq!(f_sqrt1pm1f(0.), 0.);
        assert_eq!(f_sqrt1pm1f(-0.), 0.);
        assert_eq!(f_sqrt1pm1f(f32::INFINITY), f32::INFINITY);
        assert!(f_sqrt1pm1f(f32::NEG_INFINITY).is_nan());
        assert!(f_sqrt1pm1f(f32::NAN).is_nan());
        assert!(f_sqrt1pm1f(-1.0001).is_nan());
    }
}