pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 6/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
use crate::dyadic_float::{DyadicFloat128, DyadicSign};

// Logarithm range reduction - Step 3:
//   r(k) = 2^-21 round(2^21 / (1 + k*2^-21)) for k = -80 .. 80.
// Output range:
//   [-0x1.01928p-22 , 0x1p-22]
// We store S[i] = 2^21 (r(i - 80) - 1).
static S3: [i32; 161] = [
    0x50, 0x4f, 0x4e, 0x4d, 0x4c, 0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41,
    0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31,
    0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21,
    0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11,
    0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0, -0x1,
    -0x2, -0x3, -0x4, -0x5, -0x6, -0x7, -0x8, -0x9, -0xa, -0xb, -0xc, -0xd, -0xe, -0xf, -0x10,
    -0x11, -0x12, -0x13, -0x14, -0x15, -0x16, -0x17, -0x18, -0x19, -0x1a, -0x1b, -0x1c, -0x1d,
    -0x1e, -0x1f, -0x20, -0x21, -0x22, -0x23, -0x24, -0x25, -0x26, -0x27, -0x28, -0x29, -0x2a,
    -0x2b, -0x2c, -0x2d, -0x2e, -0x2f, -0x30, -0x31, -0x32, -0x33, -0x34, -0x35, -0x36, -0x37,
    -0x38, -0x39, -0x3a, -0x3b, -0x3c, -0x3d, -0x3e, -0x3f, -0x40, -0x41, -0x42, -0x43, -0x44,
    -0x45, -0x46, -0x47, -0x48, -0x49, -0x4a, -0x4b, -0x4c, -0x4d, -0x4e, -0x4f, -0x50,
];

// Logarithm range reduction - Step 4
//   r(k) = 2^-28 round(2^28 / (1 + k*2^-28)) for k = -65 .. 64.
// Output range:
//   [-0x1.0002143p-29 , 0x1p-29]
// We store S[i] = 2^28 (r(i - 65) - 1).
static S4: [i32; 130] = [
    0x41, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32,
    0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22,
    0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12,
    0x11, 0x10, 0xf, 0xe, 0xd, 0xc, 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
    -0x1, -0x2, -0x3, -0x4, -0x5, -0x6, -0x7, -0x8, -0x9, -0xa, -0xb, -0xc, -0xd, -0xe, -0xf,
    -0x10, -0x11, -0x12, -0x13, -0x14, -0x15, -0x16, -0x17, -0x18, -0x19, -0x1a, -0x1b, -0x1c,
    -0x1d, -0x1e, -0x1f, -0x20, -0x21, -0x22, -0x23, -0x24, -0x25, -0x26, -0x27, -0x28, -0x29,
    -0x2a, -0x2b, -0x2c, -0x2d, -0x2e, -0x2f, -0x30, -0x31, -0x32, -0x33, -0x34, -0x35, -0x36,
    -0x37, -0x38, -0x39, -0x3a, -0x3b, -0x3c, -0x3d, -0x3e, -0x3f, -0x40,
];

// Logarithm range reduction - Step 2:
//   r(k) = 2^-16 round(2^16 / (1 + k*2^-14)) for k = -2^6 .. 2^7.
// Output range:
//   [-0x1.3ffcp-15, 0x1.3e3dp-15]
// We store S2[i] = 2^16 (r(i - 2^6) - 1).
static S2: [i32; 193] = [
    0x101, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1, 0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9,
    0xc5, 0xc1, 0xbd, 0xb9, 0xb4, 0xb0, 0xac, 0xa8, 0xa4, 0xa0, 0x9c, 0x98, 0x94, 0x90, 0x8c, 0x88,
    0x84, 0x80, 0x7c, 0x78, 0x74, 0x70, 0x6c, 0x68, 0x64, 0x60, 0x5c, 0x58, 0x54, 0x50, 0x4c, 0x48,
    0x44, 0x40, 0x3c, 0x38, 0x34, 0x30, 0x2c, 0x28, 0x24, 0x20, 0x1c, 0x18, 0x14, 0x10, 0xc, 0x8,
    0x4, 0x0, -0x4, -0x8, -0xc, -0x10, -0x14, -0x18, -0x1c, -0x20, -0x24, -0x28, -0x2c, -0x30,
    -0x34, -0x38, -0x3c, -0x40, -0x44, -0x48, -0x4c, -0x50, -0x54, -0x58, -0x5c, -0x60, -0x64,
    -0x68, -0x6c, -0x70, -0x74, -0x78, -0x7c, -0x80, -0x84, -0x88, -0x8c, -0x90, -0x94, -0x98,
    -0x9c, -0xa0, -0xa4, -0xa8, -0xac, -0xb0, -0xb4, -0xb7, -0xbb, -0xbf, -0xc3, -0xc7, -0xcb,
    -0xcf, -0xd3, -0xd7, -0xdb, -0xdf, -0xe3, -0xe7, -0xeb, -0xef, -0xf3, -0xf7, -0xfb, -0xff,
    -0x103, -0x107, -0x10b, -0x10f, -0x113, -0x117, -0x11b, -0x11f, -0x123, -0x127, -0x12b, -0x12f,
    -0x133, -0x137, -0x13a, -0x13e, -0x142, -0x146, -0x14a, -0x14e, -0x152, -0x156, -0x15a, -0x15e,
    -0x162, -0x166, -0x16a, -0x16e, -0x172, -0x176, -0x17a, -0x17e, -0x182, -0x186, -0x18a, -0x18e,
    -0x192, -0x195, -0x199, -0x19d, -0x1a1, -0x1a5, -0x1a9, -0x1ad, -0x1b1, -0x1b5, -0x1b9, -0x1bd,
    -0x1c1, -0x1c5, -0x1c9, -0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
    -0x1f0, -0x1f4, -0x1f8, -0x1fc,
];

// Perform logarithm range reduction steps 2-4.
// Inputs from the first step of range reduction:
//   m_x : the reduced argument after the first step of range reduction
//         satisfying  -2^-8 <= m_x < 2^-7  and  ulp(m_x) >= 2^-60.
//   idx1: index of the -log(r1) table from the first step.
// Outputs of the extra range reduction steps:
//   sum: adding -log(r1) - log(r2) - log(r3) - log(r4) to the resulted sum.
//   return value: the reduced argument v satisfying:
//                 -0x1.0002143p-29 <= v < 0x1p-29,  and  ulp(v) >= 2^(-125).
pub(crate) fn log_range_reduction(
    m_x: f64,
    log_table: &[&[DyadicFloat128]; 4],
    sum: DyadicFloat128,
) -> (DyadicFloat128, DyadicFloat128) {
    let v = (m_x * f64::from_bits(0x43b0000000000000)) as i64; // ulp = 2^-60

    // Range reduction - Step 2
    // Output range: vv2 in [-0x1.3ffcp-15, 0x1.3e3dp-15].
    // idx2 = trunc(2^14 * (v + 2^-8 + 2^-15))
    let idx2 = ((v.wrapping_add(0x10_2000_0000_0000)) >> 46) as usize;
    let z0 = log_table[1][idx2];
    let mut sum = sum + z0;

    let s2: i64 = S2[idx2] as i64; // |s| <= 2^-7, ulp = 2^-16
    let sv2 = s2 * v; // |s*v| < 2^-14, ulp = 2^(-60-16) = 2^-76
    let spv2 = s2.wrapping_shl(44).wrapping_add(v); // |s + v| < 2^-14, ulp = 2^-60
    let vv2 = spv2.wrapping_shl(16).wrapping_add(sv2); // |vv2| < 2^-14, ulp = 2^-76

    // Range reduction - Step 3
    // Output range: vv3 in [-0x1.01928p-22 , 0x1p-22]
    // idx3 = trunc(2^21 * (v + 80*2^-21 + 2^-22))
    let idx3 = vv2.wrapping_add(0x2840_0000_0000_0000).wrapping_shr(55) as usize;
    sum = sum + log_table[2][idx3];

    let s3 = S3[idx3] as i64; // |s| < 2^-13, ulp = 2^-21
    let spv3: i64 = s3.wrapping_shl(55).wrapping_add(vv2); // |s + v| < 2^-21, ulp = 2^-76
    // |s*v| < 2^-27, ulp = 2^(-76-21) = 2^-97
    let sv3: i128 = s3 as i128 * vv2 as i128;
    // |vv3| < 2^-21, ulp = 2^-97
    let vv3 = (spv3 as i128).wrapping_shl(21).wrapping_add(sv3);

    // Range reduction - Step 4
    // Output range: vv4 in [-0x1.0002143p-29 , 0x1p-29]
    // idx4 = trunc(2^21 * (v + 65*2^-28 + 2^-29))
    let idx4 = ((((vv3 >> 68) as i32).wrapping_add(131)) >> 1) as usize;

    let z4 = log_table[3][idx4];
    sum = sum + z4;

    let s4: i128 = S4[idx4] as i128; // |s| < 2^-21, ulp = 2^-28
    // |s + v| < 2^-28, ulp = 2^-97
    let spv4 = s4.wrapping_shl(69).wrapping_add(vv3);
    // |s*v| < 2^-42, ulp = 2^(-97-28) = 2^-125
    let sv4 = s4 * vv3;
    // |vv4| < 2^-28, ulp = 2^-125
    let vv4 = spv4.wrapping_shl(28).wrapping_add(sv4);

    let mut z0 = if vv4 < 0 {
        DyadicFloat128 {
            sign: DyadicSign::Neg,
            exponent: -125,
            mantissa: (-vv4) as u128,
        }
    } else {
        DyadicFloat128 {
            sign: DyadicSign::Pos,
            exponent: -125,
            mantissa: vv4 as u128,
        }
    };
    z0.normalize();

    (z0, sum)
}