pxfm 0.1.29

Fast and accurate math
Documentation
/*
 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
 * //
 * // Redistribution and use in source and binary forms, with or without modification,
 * // are permitted provided that the following conditions are met:
 * //
 * // 1.  Redistributions of source code must retain the above copyright notice, this
 * // list of conditions and the following disclaimer.
 * //
 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
 * // this list of conditions and the following disclaimer in the documentation
 * // and/or other materials provided with the distribution.
 * //
 * // 3.  Neither the name of the copyright holder nor the names of its
 * // contributors may be used to endorse or promote products derived from
 * // this software without specific prior written permission.
 * //
 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* Given 2^-1074 <= x <= 0x1.fffffffffffffp+1023, this routine puts in h+l
   an approximation of log(x) such that |l| < 2^-23.89*|h| and

   | h + l - log(x) | <= elog * |log x|

   with elog = 2^-73.527  if x < 1/sqrt(2) or sqrt(2) < x,
   and  elog = 2^-67.0544 if 1/sqrt(2) < x < sqrt(2)
   (note that x cannot equal 1/sqrt(2) nor sqrt(2)).
*/
use crate::common::dd_fmla;
use crate::double_double::DoubleDouble;
use crate::pow_exec::log_poly_1;
use crate::pow_tables::POW_INVERSE;

// Generated by SageMath:
// values = POW_INVERSE
// R = RealField(150)
//
// def hex_to_float(h):
//     return struct.unpack('>d', struct.pack('>Q', h))[0]
//
// real_array = [R(hex_to_float(h)) for h in values]
//
// for r in real_array:
//     print_double_double("", -RealField(180)(r).log())
pub(crate) static FAST_LOG_DD_INV: [(u64, u64); 182] = [
    (0x3c6bc60efafc6f6e, 0xbfd5ff3070a793d4),
    (0x3c78ebcb7dee9a3d, 0xbfd5a42ab0f4cfe2),
    (0x3c6819cf7e308ddb, 0xbfd548a2c3add263),
    (0x3c742a87d977dc5e, 0xbfd4ec973260026a),
    (0x3c69ffc341f177dc, 0xbfd49006804009d1),
    (0x3c729931715ac903, 0xbfd432ef2a04e814),
    (0x3c70bcfb6082ce6d, 0xbfd404308686a7e4),
    (0x3c6c68651945f97c, 0xbfd3a64c556945ea),
    (0x3c64dd4c580919f8, 0xbfd347dd9a987d55),
    (0x3c78f4cdb95ebdf9, 0xbfd2e8e2bae11d31),
    (0xbc77ad24c13f040e, 0xbfd2895a13de86a3),
    (0x3c776f5eb09628af, 0xbfd22941fbcf7966),
    (0x3c7c9fdf9a0c4b07, 0xbfd1f8ff9e48a2f3),
    (0xbc79d3d1b0e4d147, 0xbfd1980d2dd4236f),
    (0xbc77b66298edd24a, 0xbfd136870293a8b0),
    (0xbc589fa0ab4cb31d, 0xbfd1058bf9ae4ad5),
    (0xbc77dcfde8061c03, 0xbfd0a324e27390e3),
    (0x3c628ec217a5022d, 0xbfd0402594b4d041),
    (0x3c6caaae64f21acb, 0xbfcfb9186d5e3e2b),
    (0xbc2c5f6dfd018c37, 0xbfcf550a564b7b37),
    (0x3c46e03a39bfc89b, 0xbfce8c0252aa5a60),
    (0x3c461578001e0162, 0xbfce27076e2af2e6),
    (0xbc66e443597e4d40, 0xbfcd5c216b4fbb91),
    (0x3c64f689f8434012, 0xbfcc8ff7c79a9a22),
    (0x3c673dee38a3fb6b, 0xbfcc2968558c18c1),
    (0xbc6ba27fdc19e1a0, 0xbfcb5b519e8fb5a4),
    (0x3c5398cff3641985, 0xbfcaf3c94e80bff3),
    (0xbc493711b07a998c, 0xbfca23bc1fe2b563),
    (0xbc6575e31f003e0c, 0xbfc9bb362e7dfb83),
    (0x3c6569d851a56770, 0xbfc8e928de886d41),
    (0x3c6bf7fdbfa08d9a, 0xbfc87fa06520c911),
    (0xbc4be36b2d6a0608, 0xbfc7ab890210d909),
    (0x3c5b264062a84cdb, 0xbfc740f8f54037a5),
    (0x3c6caae268ecd179, 0xbfc6d60fe719d21d),
    (0x3c5bc60efafc6f6e, 0xbfc5ff3070a793d4),
    (0x3c565d22aa8ad7cf, 0xbfc59338d9982086),
    (0xbc668981bcc36756, 0xbfc4ba36f39a55e5),
    (0xbc69f4f6543e1f88, 0xbfc44d2b6ccb7d1e),
    (0x3c5ab3a8e7d81017, 0xbfc3dfc2b0ecc62a),
    (0x3c06b9c7d96091fa, 0xbfc303d718e47fd3),
    (0xbc6301771c407dbf, 0xbfc29552f81ff523),
    (0xbc6f547bf1809e88, 0xbfc2266f190a5acb),
    (0xbc6a28813e3a7f07, 0xbfc14785846742ac),
    (0xbc69a5dc5e9030ac, 0xbfc0d77e7cd08e59),
    (0xbc550c647eb86499, 0xbfc0671512ca596e),
    (0xbc585f325c5bbacd, 0xbfbf0a30c01162a6),
    (0x3c361578001e0162, 0xbfbe27076e2af2e6),
    (0xbc5790dd951d90fa, 0xbfbd4313d66cb35d),
    (0xbc35d617ef8161b1, 0xbfbc5e548f5bc743),
    (0xbc5942f48aa70ea9, 0xbfba926d3a4ad563),
    (0x3c42099e1c184e8e, 0xbfb9ab42462033ad),
    (0x3c24a697ab3424a9, 0xbfb8c345d6319b21),
    (0x3c5eeedfcdd94131, 0xbfb7da766d7b12cd),
    (0x3c5388458ec21b6a, 0xbfb60658a93750c4),
    (0xbc5a49e39a1a8be4, 0xbfb51b073f06183f),
    (0xbc4ddd4f935996c9, 0xbfb42edcbea646f0),
    (0x3c5b599f227becbb, 0xbfb341d7961bd1d1),
    (0x3c1c125963fc4cfd, 0xbfb253f62f0a1417),
    (0x3c379da3e8c22cda, 0xbfb16536eea37ae1),
    (0xbc485f325c5bbacd, 0xbfaf0a30c01162a6),
    (0xbc21e3c53257fd47, 0xbfad276b8adb0b52),
    (0x3c3eb9759c130499, 0xbfab42dd711971bf),
    (0xbc4f5a0e80520bf2, 0xbfa95c830ec8e3eb),
    (0xbc418d3ca87b9296, 0xbfa77458f632dcfc),
    (0x3c4ce55c2b4e2b72, 0xbfa58a5bafc8e4d5),
    (0x3c45bfa937f551bb, 0xbfa39e87b9febd60),
    (0x3c3e9ae889bac481, 0xbfa1b0d98923d980),
    (0xbc333e3f04f1ef23, 0xbf9f829b0e783300),
    (0x3bf0ae69229dc868, 0xbf9b9fc027af9198),
    (0x3c35b602ace3a510, 0xbf97b91b07d5b11b),
    (0x3c10cb5a902b3a1c, 0xbf93cea44346a575),
    (0x3c183092c59642a1, 0xbf8fc0a8b0fc03e4),
    (0x3c116d7687d3df21, 0xbf87dc475f810a77),
    (0x3bce44b7e3711ebf, 0xbf7fe02a6b106789),
    (0x0000000000000000, 0x0000000000000000),
    (0x0000000000000000, 0x0000000000000000),
    (0x3bec14b9f9377a1d, 0x3f78121214586b54),
    (0xbc2c5517f64bc223, 0x3f841929f96832f0),
    (0x3c2806208c04c220, 0x3f8c317384c75f06),
    (0xbc2cd7b66e01c26d, 0x3f9228fb1fea2e28),
    (0xbbf8ed4d357c9c97, 0x3f963d6178690bd6),
    (0x3c1ec1a5f86d41f9, 0x3f9a55f548c5c43f),
    (0x3c375b44595cab18, 0x3f9e72bf2813ce51),
    (0x3c4c05cf1d753622, 0x3fa0415d89e74444),
    (0xbc4947f792615916, 0x3fa252f32f8d183f),
    (0xbc4cdd6f7f4a137e, 0x3fa466aed42de3ea),
    (0x3c40413e6505e603, 0x3fa67c94f2d4bb58),
    (0x3c3a8be97660a23d, 0x3fa894aa149fb343),
    (0x3c2a353bb42e0add, 0x3faaaef2d0fb10fc),
    (0x3c3e5cf3a0f56f72, 0x3fabbcebfc68f420),
    (0x3c44e6c986f44c55, 0x3fadda8adc67ee4e),
    (0xbc4cd9f1f95c2eed, 0x3faffa6911ab9301),
    (0xbc5a4a128d192686, 0x3fb10e45b3cae831),
    (0xbc5cc0fbce104eaa, 0x3fb2207b5c78549e),
    (0xbc5d15d38d2fa3f7, 0x3fb2aa04a44717a5),
    (0x3c47a976d3b5b45f, 0x3fb3bdf5a7d1ee64),
    (0x3c5769f42c7842cc, 0x3fb4d3115d207eac),
    (0xbc545f9d61c68c1b, 0x3fb55e10050e0384),
    (0xbc59acd8b33f8fdc, 0x3fb674f089365a7a),
    (0x3c5abca5b4fdb880, 0x3fb78d02263d82d3),
    (0x3c3b9f2dffbeed43, 0x3fb8197e2f40e3f0),
    (0xbc5478a85704ccb7, 0x3fb9335e5d594989),
    (0xbc55b5ca203e4259, 0x3fba4e7640b1bc38),
    (0x3c537d8f39bee659, 0x3fbadc77ee5aea8c),
    (0xbc4cdc9f6f5f38c7, 0x3fbbf968769fca11),
    (0x3c49daf7df76ad2a, 0x3fbd179788219364),
    (0x3c5401fa71733019, 0x3fbda727638446a2),
    (0xbc4a2bf991780d3f, 0x3fbec739830a1120),
    (0xbc59361574fb24e2, 0x3fbf57bc7d9005db),
    (0x3c639e2d3f8b7d10, 0x3fc03cdc0a51ec0d),
    (0xbc6dd7009902bf32, 0x3fc08598b59e3a07),
    (0xbc50e63a5f01c691, 0x3fc1178e8227e47c),
    (0xbc62d56ff61c2bfb, 0x3fc160c8024b27b1),
    (0x3c462c9ef939ac5d, 0x3fc1f3b925f25d41),
    (0xbc66e38161051d69, 0x3fc23d712a49c202),
    (0xbc5499a3f25af95f, 0x3fc2d1610c86813a),
    (0xbc5c4716bdfc0cc9, 0x3fc31b994d3a4f85),
    (0x3c370d6cdf05266c, 0x3fc3b08b6757f2a9),
    (0xbc6d87e6a354d056, 0x3fc3fb45a59928cc),
    (0xbc50d5604930f135, 0x3fc4913d8333b561),
    (0xbc6927d47803c5f4, 0x3fc4dc7b897bc1c8),
    (0x3c64f4d710fec38e, 0x3fc5737cc9018cdd),
    (0xbc21f5b44c0df7e7, 0x3fc5bf406b543db2),
    (0xbc3d34f0f4621bed, 0x3fc6574ebe8c133a),
    (0x3c696332bd4b341f, 0x3fc6a399dabbd383),
    (0xbc68de59c21e166c, 0x3fc6f0128b756abc),
    (0x3c5ef8f6ebcfb201, 0x3fc7898d85444c73),
    (0xbc4ac5f0c075b847, 0x3fc7d6903caf5ad0),
    (0x3c6d685f35eea2a0, 0x3fc871213750e994),
    (0x3c555aa8b6997a40, 0x3fc8beafeb38fe8c),
    (0x3c6054473941ad99, 0x3fc90c6db9fcbcd9),
    (0x3c6f47dfd871f87f, 0x3fc9a8778debaa38),
    (0x3c435a19605e67ef, 0x3fc9f6c407089664),
    (0x3c5df207dc5c34c6, 0x3fca454082e6ab05),
    (0x3c6ab5ca9eaa088a, 0x3fcae2ca6f672bd4),
    (0xbc66353ab386a94d, 0x3fcb31d8575bce3d),
    (0x3c3a0ee735d9f0ec, 0x3fcb811730b823d2),
    (0x3c3dd355f6a516d7, 0x3fcbd087383bd8ad),
    (0xbc68e58b2c57a4a5, 0x3fcc6ffbc6f00f71),
    (0x3c653d154280394f, 0x3fccc000c9db3c52),
    (0x3c660629242471a2, 0x3fcd1037f2655e7b),
    (0x3c5aa11d49f96cb9, 0x3fcdb13db0d48940),
    (0x3c5fea48dd7b81d1, 0x3fce020cc6235ab5),
    (0x3c42276041f43042, 0x3fce530effe71012),
    (0xbc6d33919ab94074, 0x3fcea4449f04aaf5),
    (0xbc527c77ded76aad, 0x3fcf474b134df229),
    (0x3c6f665066f980a2, 0x3fcf991c6cb3b379),
    (0x3c28de00938b4c40, 0x3fcfeb2233ea07cd),
    (0xbc418290bd2932e2, 0x3fd01eae5626c691),
    (0xbc70779634061cbc, 0x3fd047e60cde83b8),
    (0x3c643c2e68684d53, 0x3fd09aa572e6c6d4),
    (0x3c5162c79d5d11ee, 0x3fd0c42d676162e3),
    (0xbc692b49ef282b09, 0x3fd0edd060b78081),
    (0xbc60e63a5f01c691, 0x3fd1178e8227e47c),
    (0x3c1e0936abd4fa6e, 0x3fd14167ef367783),
    (0x3c766fbd28b40935, 0x3fd16b5ccbacfb73),
    (0xbc612aeb84249223, 0x3fd1bf99635a6b95),
    (0x3c7512c3749a1e4e, 0x3fd1e9e1678899f4),
    (0x3c6f7ae91aeba60a, 0x3fd214456d0eb8d4),
    (0x3c3bb75d1addf870, 0x3fd23ec5991eba49),
    (0x3c7e0efadd9db02b, 0x3fd269621134db92),
    (0xbc6856e61c515740, 0x3fd2941afb186b7c),
    (0xbc782dad7fd86088, 0x3fd2bef07cdc9354),
    (0xbc73d69909e5c3dc, 0x3fd314f1e1d35ce4),
    (0xbc5cd55b8a4746c0, 0x3fd3401e12aecba1),
    (0xbc5324f0e883858e, 0x3fd36b6776be1117),
    (0xbc5ce2b31b31e8b0, 0x3fd396ce359bbf54),
    (0xbc72ad27e50a8ec6, 0x3fd3c25277333184),
    (0x3c783d680d3c1084, 0x3fd3edf463c1683e),
    (0x3c60dbb243827392, 0x3fd419b423d5e8c7),
    (0xbc72b125247b0fa5, 0x3fd44591e0539f49),
    (0x3c38fb4c14c56eef, 0x3fd4718dc271c41b),
    (0xbc69964a168ccaca, 0x3fd49da7f3bcc41f),
    (0xbc5123615b147a5d, 0x3fd4c9e09e172c3c),
    (0xbc758cb3124b9245, 0x3fd4f637ebba9810),
    (0xbc68f7e9b38a6979, 0x3fd522ae0738a3d8),
    (0xbc7aacfdbbdab914, 0x3fd54f431b7be1a9),
    (0xbc60908d15f88b63, 0x3fd57bf753c8d1fb),
    (0xbc5e6c2bdfb3e037, 0x3fd5a8cadbbedfa1),
    (0xbc76541148cbb8a2, 0x3fd5d5bddf595f30),
    (0xbc56e8920c09b73f, 0x3fd602d08af091ec),
    (0x3c6dc18ce51fff99, 0x3fd630030b3aac49),
];

#[inline]
pub(crate) fn fast_log_dd(ddx: DoubleDouble) -> DoubleDouble {
    // We'll compute log((z+1)+1) as log(xh+xl) = log(xh) + log(1+xl/xh).
    // since xl/xh < ulp(xh) we'll use for log(1+xl/xh)
    // one taylor term what means that log(1+xl/xh) = log_lo + O(x^2)
    let log_lo = if ddx.hi <= f64::from_bits(0x7fd0000000000000) || ddx.lo.abs() >= 4.0 {
        ddx.lo / ddx.hi
    } else {
        0.
    }; // avoid spurious underflow

    let x_u = ddx.hi.to_bits();
    let mut m = x_u & 0xfffffffffffff;
    let mut e: i64 = ((x_u >> 52) & 0x7ff) as i64;

    let t;
    if e != 0 {
        t = m | (0x3ffu64 << 52);
        m = m.wrapping_add(1u64 << 52);
        e -= 0x3ff;
    } else {
        /* x is a subnormal double  */
        let k = m.leading_zeros() - 11;

        e = -0x3fei64 - k as i64;
        m = m.wrapping_shl(k);
        t = m | (0x3ffu64 << 52);
    }

    /* now |x| = 2^_e*_t = 2^(_e-52)*m with 1 <= _t < 2,
    and 2^52 <= _m < 2^53 */

    //   log(x) = log(t) + E · log(2)
    let mut t = f64::from_bits(t);

    // If m > sqrt(2) we divide it by 2 so ensure 1/sqrt(2) < t < sqrt(2)
    let c: usize = (m >= 0x16a09e667f3bcd) as usize;
    static CY: [f64; 2] = [1.0, 0.5];
    static CM: [u64; 2] = [44, 45];

    e = e.wrapping_add(c as i64);
    let be = e;
    let i = m >> CM[c]; /* i/2^8 <= t < (i+1)/2^8 */
    /* when c=1, we have 0x16a09e667f3bcd <= m < 2^53, thus 90 <= i <= 127;
    when c=0, we have 2^52 <= m < 0x16a09e667f3bcd, thus 128 <= i <= 181 */
    t *= CY[c];
    /* now 0x1.6a09e667f3bcdp-1 <= t < 0x1.6a09e667f3bcdp+0,
    and log(x) = E * log(2) + log(t) */

    let r = f64::from_bits(POW_INVERSE[(i - 181) as usize]);
    let log_r = DoubleDouble::from_bit_pair(FAST_LOG_DD_INV[(i - 181) as usize]);

    let z = dd_fmla(r, t, -1.0);

    const LOG2_DD: DoubleDouble = DoubleDouble::new(
        f64::from_bits(0x3c7abc9e3b39803f),
        f64::from_bits(0x3fe62e42fefa39ef),
    );

    let dt = DoubleDouble::mul_f64_add(LOG2_DD, be as f64, log_r);

    let mut v = DoubleDouble::f64_add(dt.hi, DoubleDouble::new(dt.lo, z));
    let p = log_poly_1(z);
    v = DoubleDouble::f64_add(v.hi, DoubleDouble::new(v.lo + p.lo + log_lo, p.hi));

    v
}

#[inline]
pub(crate) fn fast_log_d_to_dd(ddx: f64) -> DoubleDouble {
    // We'll compute log((z+1)+1) as log(xh+xl) = log(xh) + log(1+xl/xh).
    // since xl/xh < ulp(xh) we'll use for log(1+xl/xh)
    // one taylor term what means that log(1+xl/xh) = log_lo + O(x^2)

    let x_u = ddx.to_bits();
    let mut m = x_u & 0xfffffffffffff;
    let mut e: i64 = ((x_u >> 52) & 0x7ff) as i64;

    let t;
    if e != 0 {
        t = m | (0x3ffu64 << 52);
        m = m.wrapping_add(1u64 << 52);
        e -= 0x3ff;
    } else {
        /* x is a subnormal double  */
        let k = m.leading_zeros() - 11;

        e = -0x3fei64 - k as i64;
        m = m.wrapping_shl(k);
        t = m | (0x3ffu64 << 52);
    }

    /* now |x| = 2^_e*_t = 2^(_e-52)*m with 1 <= _t < 2,
    and 2^52 <= _m < 2^53 */

    //   log(x) = log(t) + E · log(2)
    let mut t = f64::from_bits(t);

    // If m > sqrt(2) we divide it by 2 so ensure 1/sqrt(2) < t < sqrt(2)
    let c: usize = (m >= 0x16a09e667f3bcd) as usize;
    static CY: [f64; 2] = [1.0, 0.5];
    static CM: [u64; 2] = [44, 45];

    e = e.wrapping_add(c as i64);
    let be = e;
    let i = m >> CM[c]; /* i/2^8 <= t < (i+1)/2^8 */
    /* when c=1, we have 0x16a09e667f3bcd <= m < 2^53, thus 90 <= i <= 127;
    when c=0, we have 2^52 <= m < 0x16a09e667f3bcd, thus 128 <= i <= 181 */
    t *= CY[c];
    /* now 0x1.6a09e667f3bcdp-1 <= t < 0x1.6a09e667f3bcdp+0,
    and log(x) = E * log(2) + log(t) */

    let r = f64::from_bits(POW_INVERSE[(i - 181) as usize]);
    let log_r = DoubleDouble::from_bit_pair(FAST_LOG_DD_INV[(i - 181) as usize]);

    let z = dd_fmla(r, t, -1.0);

    const LOG2_DD: DoubleDouble = DoubleDouble::new(
        f64::from_bits(0x3c7abc9e3b39803f),
        f64::from_bits(0x3fe62e42fefa39ef),
    );

    let dt = DoubleDouble::mul_f64_add(LOG2_DD, be as f64, log_r);

    let mut v = DoubleDouble::f64_add(dt.hi, DoubleDouble::new(dt.lo, z));
    let p = log_poly_1(z);
    v = DoubleDouble::f64_add(v.hi, DoubleDouble::new(v.lo + p.lo, p.hi));

    DoubleDouble::from_exact_add(v.hi, v.lo)
}