deep_time/
utils.rs

1use crate::Real;
2
3/// Clamps an `i128` to the representable range of `i64`.
4#[inline]
5pub(crate) const fn clamp_i128_to_i64(x: i128) -> i64 {
6    if x > i64::MAX as i128 {
7        i64::MAX
8    } else if x < i64::MIN as i128 {
9        i64::MIN
10    } else {
11        x as i64
12    }
13}
14
15/// Ultra-high-accuracy sine approximation that can be evaluated at compile time.
16///
17/// This is a private helper function used internally for high-precision
18/// astronomical time transformations (primarily TDB-TT and related
19/// relativistic corrections). It provides a balance of accuracy, speed,
20/// and `const fn` compatibility without requiring any runtime tables or
21/// external dependencies.
22///
23/// # Algorithm
24///
25/// 1. **Range reduction**  
26///    The input angle is reduced to the interval `[-π, π]` using floating-point
27///    modulo, followed by a second reduction to `[0, π/2]` by exploiting the
28///    identities `sin(-x) = -sin(x)` and `sin(π - x) = sin(x)`.
29///
30/// 2. **Taylor series (Horner form)**  
31///    The sine is computed using the Taylor series for `sin(x)` around zero,
32///    truncated after the `x¹⁵` term and evaluated with Horner's method for
33///    optimal numerical stability and minimal operations:
34///
35///    ```text
36///    sin(x) = x − x³/3! + x⁵/5! − x⁷/7! + x⁹/9! − x¹¹/11! + x¹³/13! − x¹⁵/15!
37///    ```
38///
39///    All powers of `x²` are accumulated via repeated multiplication by `y = x²`.
40///
41/// # Accuracy
42///
43/// - **Maximum absolute error**: ≈ **6.02 × 10⁻¹²** radians over the entire
44///   real line (the worst case occurs near odd multiples of `π/2`).
45/// - This is more than **100×** better than the previous 7-term version and
46///   over **9,000×** better than the original 5-term implementation.
47/// - For all practical TDB-TT and astronomical time-scale work the error is
48///   completely negligible — it is smaller than the inherent uncertainty of
49///   most input ephemerides.
50///
51/// # Performance & Const-fn Properties
52///
53/// - Entirely `const fn` compatible (no heap allocation, no panics on valid input).
54/// - Uses only multiplication, addition/subtraction, and a single modulo.
55/// - Horner evaluation requires only **8 multiplications and 8 additions** after
56///   range reduction — still extremely cheap.
57/// - No conditional branches inside the polynomial evaluation.
58///
59/// # Limitations
60///
61/// - For extremely large arguments (`|x| ≳ 10¹⁴`), the floating-point modulo
62///   operation loses precision because the mantissa of `f64` can no longer
63///   represent the fractional part of `x / (2π)` accurately. In practice this
64///   is irrelevant for all astronomical time arguments encountered in TDB-TT
65///   calculations.
66/// - The function does not handle `NaN` or `Infinity` specially; they propagate
67///   according to IEEE 754 rules.
68///
69/// # Design Rationale
70///
71/// The implementation uses a plain Taylor series (instead of a minimax or
72/// Chebyshev polynomial) because:
73/// - The coefficients are simple exact fractions (`1/n!`).
74/// - The series is trivial to extend or truncate.
75/// - Horner form already gives near-optimal accuracy for the chosen degree
76///   on `[0, π/2]`.
77///
78/// One more term (`+x¹⁷/17!`) can be added if future requirements ever demand
79/// sub-10⁻¹³ accuracy.
80///
81/// # See Also
82///
83/// - Previous versions in git history (5-term, 7-term) for regression testing.
84/// - `core::f64::consts::PI` and the `f!` macro for literal typing.
85pub const fn sin_approx(x: Real) -> Real {
86    const PI: Real = f!(core::f64::consts::PI);
87    const TWO_PI: Real = f!(2.0) * PI;
88
89    // === Range reduction to [-π, π] ===
90    // Uses the mathematical identity sin(x) = sin(x mod 2π).
91    // The two-step adjustment guarantees the result lies in [-π, π].
92    let mut x = x % TWO_PI;
93    if x < f!(0.0) {
94        x += TWO_PI;
95    }
96    if x > PI {
97        x -= TWO_PI;
98    }
99
100    // === Sign handling and reduction to [0, π/2] ===
101    // sin(-x) = -sin(x)  and  sin(π - x) = sin(x)
102    let sign = if x < f!(0.0) { f!(-1.0) } else { f!(1.0) };
103    let x = x.abs();
104
105    let x = if x > PI / f!(2.0) { PI - x } else { x };
106
107    // === Taylor series via Horner's method (up to x¹⁵ term) ===
108    // y = x²
109    // p(y) = 1 − y/3! + y²/5! − y³/7! + y⁴/9! − y⁵/11! + y⁶/13! − y⁷/15!
110    // sin(x) = x · p(y)
111    //
112    // Horner form is used for:
113    //   • minimal number of operations (8 muls + 8 adds)
114    //   • excellent floating-point rounding properties
115    //   • trivial extensibility
116    let y = x * x;
117
118    // Start with the highest-degree coefficient and work downwards.
119    // All denominators are exact factorials and are exactly representable
120    // as f64 constants.
121    let p = f!(-1.0) / f!(1307674368000.0); // −1/15!
122    let p = p * y + f!(1.0) / f!(6227020800.0); // +1/13!
123    let p = p * y + f!(-1.0) / f!(39916800.0); // −1/11!
124    let p = p * y + f!(1.0) / f!(362880.0); // +1/9!
125    let p = p * y + f!(-1.0) / f!(5040.0); // −1/7!
126    let p = p * y + f!(1.0) / f!(120.0); // +1/5!
127    let p = p * y + f!(-1.0) / f!(6.0); // −1/3!
128    let p = p * y + f!(1.0); // +1
129
130    sign * (x * p)
131}
132
133/// `const fn` implementation of floor for `Real`.
134///
135/// This is identical to `std::f64::floor` (including signed-zero
136/// preservation) while remaining fully `const fn` on stable Rust with `#![no_std]`.
137pub(crate) const fn floor_f(x: Real) -> Real {
138    if x.is_nan() || x.is_infinite() {
139        x
140    } else if x == f!(0.0) {
141        x // preserves +0.0 or -0.0
142    } else {
143        let i = x as i64;
144        let truncated = f!(i);
145        if x >= f!(0.0) || truncated == x {
146            truncated
147        } else {
148            truncated - f!(1.0)
149        }
150    }
151}
152
153const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */
154const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */
155const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */
156const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */
157const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */
158const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */
159const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */
160const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */
161const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */
162
163/// The natural logarithm of `x` (f64).
164pub const fn log(mut x: f64) -> f64 {
165    let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
166
167    let mut ui = x.to_bits();
168    let mut hx: u32 = (ui >> 32) as u32;
169    let mut k: i32 = 0;
170
171    if (hx < 0x00100000) || ((hx >> 31) != 0) {
172        /* x < 2**-126  */
173        if ui << 1 == 0 {
174            return -1. / (x * x); /* log(+-0)=-inf */
175        }
176        if hx >> 31 != 0 {
177            return (x - x) / 0.0; /* log(-#) = NaN */
178        }
179        /* subnormal number, scale x up */
180        k -= 54;
181        x *= x1p54;
182        ui = x.to_bits();
183        hx = (ui >> 32) as u32;
184    } else if hx >= 0x7ff00000 {
185        return x;
186    } else if hx == 0x3ff00000 && ui << 32 == 0 {
187        return 0.;
188    }
189
190    /* reduce x into [sqrt(2)/2, sqrt(2)] */
191    hx += 0x3ff00000 - 0x3fe6a09e;
192    k += ((hx >> 20) as i32) - 0x3ff;
193    hx = (hx & 0x000fffff) + 0x3fe6a09e;
194    ui = ((hx as u64) << 32) | (ui & 0xffffffff);
195    x = f64::from_bits(ui);
196
197    let f: f64 = x - 1.0;
198    let hfsq: f64 = 0.5 * f * f;
199    let s: f64 = f / (2.0 + f);
200    let z: f64 = s * s;
201    let w: f64 = z * z;
202    let t1: f64 = w * (LG2 + w * (LG4 + w * LG6));
203    let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7)));
204    let r: f64 = t2 + t1;
205    let dk: f64 = k as f64;
206    s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI
207}
208
209// musl-style Table-driven Goldschmidt sqrt for f64
210// Translated from musl/src/math/sqrt.c and sqrt_data.c
211// Provides correctly rounded sqrt(x) matching IEEE 754 / libm quality
212
213const RSQRT_TAB: [u16; 128] = [
214    0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa,
215    0xa592, 0xa480, 0xa373, 0xa26b, 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
216    0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, 0x936b, 0x92a9, 0x91ea, 0x912e,
217    0x9075, 0x8fbe, 0x8f0a, 0x8e59, 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
218    0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, 0x83ec, 0x8361, 0x82d8, 0x8250,
219    0x81c9, 0x8145, 0x80c2, 0x8040, 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
220    0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, 0xe443, 0xe2dc, 0xe17a, 0xe020,
221    0xdecb, 0xdd7d, 0xdc34, 0xdaf1, 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
222    0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, 0xc858, 0xc764, 0xc674, 0xc587,
223    0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
224    0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
225];
226
227#[inline]
228const fn mul32(a: u32, b: u32) -> u32 {
229    ((a as u64).wrapping_mul(b as u64) >> 32) as u32
230}
231
232#[inline]
233const fn mul64(a: u64, b: u64) -> u64 {
234    let ahi = a >> 32;
235    let alo = a & 0xffffffff;
236    let bhi = b >> 32;
237    let blo = b & 0xffffffff;
238    ahi.wrapping_mul(bhi)
239        .wrapping_add(ahi.wrapping_mul(blo) >> 32)
240        .wrapping_add(alo.wrapping_mul(bhi) >> 32)
241}
242
243/// Computes sqrt(x) using the table-driven Goldschmidt iteration
244/// from musl libc. Correctly rounded to nearest-even for all f64 inputs.
245/// const, no std, no alloc friendly.
246pub const fn sqrt(x: f64) -> f64 {
247    let mut ix = x.to_bits();
248    let mut top = ix >> 52;
249
250    // Special cases: subnormal, inf, nan, negative, zero
251    if top.wrapping_sub(0x001) >= 0x7fe {
252        if ix << 1 == 0 {
253            return x; // ±0.0
254        }
255        if ix == 0x7ff0_0000_0000_0000 {
256            return x; // +inf
257        }
258        if ix > 0x7ff0_0000_0000_0000 {
259            // negative or NaN → quiet NaN, preserve sign bit for -inf/-num
260            let nan_bits = 0x7ff8_0000_0000_0000 | (ix & 0x8000_0000_0000_0000);
261            return f64::from_bits(nan_bits);
262        }
263        // Subnormal: normalize by multiplying by 2^52
264        let scale = f64::from_bits(0x4330_0000_0000_0000); // 2^52
265        ix = (x * scale).to_bits();
266        top = (ix >> 52).wrapping_sub(52);
267    }
268
269    let even = top & 1;
270    let mut m = (ix << 11) | 0x8000_0000_0000_0000u64;
271    if even != 0 {
272        m >>= 1;
273    }
274    let top = (top.wrapping_add(0x3ff)) >> 1; // result exponent (biased)
275
276    // Table-driven initial reciprocal sqrt estimate + Goldschmidt iterations
277    // All vars u64 to match C closely; mul32/mul64 return u64 for simplicity
278    let three: u64 = 0xc000_0000;
279    let i = ((ix >> 46) % 128) as usize;
280    let mut r: u64 = (RSQRT_TAB[i] as u64) << 16;
281
282    let mut s: u64 = mul32((m >> 32) as u32, r as u32) as u64;
283    let mut d: u64 = mul32(s as u32, r as u32) as u64;
284    let mut u: u64 = three - d;
285    r = (mul32(r as u32, u as u32) << 1) as u64;
286    s = (mul32(s as u32, u as u32) << 1) as u64;
287
288    d = mul32(s as u32, r as u32) as u64;
289    u = three - d;
290    r = (mul32(r as u32, u as u32) << 1) as u64;
291
292    r <<= 32;
293    s = mul64(m, r);
294    d = mul64(s, r);
295    u = (three << 32) - d;
296    s = mul64(s, u);
297
298    // Final adjustment and rounding decision
299    s = (s - 2) >> 9;
300
301    let d0 = (m << 42).wrapping_sub(s.wrapping_mul(s));
302    let d1 = s.wrapping_sub(d0);
303    let _d2 = d1.wrapping_add(s).wrapping_add(1);
304
305    if (d1 >> 63) != 0 {
306        s = s.wrapping_add(1);
307    }
308    s &= 0x000f_ffff_ffff_ffff;
309    s |= (top as u64) << 52;
310
311    f64::from_bits(s)
312}
313
314const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1
315
316const fn sq(x: f64) -> (f64, f64) {
317    let xh: f64;
318    let xl: f64;
319    let xc: f64;
320
321    xc = x * SPLIT;
322    xh = x - xc + xc;
323    xl = x - xh;
324    let hi = x * x;
325    let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
326    (hi, lo)
327}
328
329pub const fn hypot(mut x: f64, mut y: f64) -> f64 {
330    let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
331    let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700
332
333    let mut uxi = x.to_bits();
334    let mut uyi = y.to_bits();
335    let uti;
336    let ex: i64;
337    let ey: i64;
338    let mut z: f64;
339
340    /* arrange |x| >= |y| */
341    uxi &= -1i64 as u64 >> 1;
342    uyi &= -1i64 as u64 >> 1;
343    if uxi < uyi {
344        uti = uxi;
345        uxi = uyi;
346        uyi = uti;
347    }
348
349    /* special cases */
350    ex = (uxi >> 52) as i64;
351    ey = (uyi >> 52) as i64;
352    x = f64::from_bits(uxi);
353    y = f64::from_bits(uyi);
354    /* note: hypot(inf,nan) == inf */
355    if ey == 0x7ff {
356        return y;
357    }
358    if ex == 0x7ff || uyi == 0 {
359        return x;
360    }
361    /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
362    /* 64 difference is enough for ld80 double_t */
363    if ex - ey > 64 {
364        return x + y;
365    }
366
367    /* precise sqrt argument in nearest rounding mode without overflow */
368    /* xh*xh must not overflow and xl*xl must not underflow in sq */
369    z = 1.;
370    if ex > 0x3ff + 510 {
371        z = x1p700;
372        x *= x1p_700;
373        y *= x1p_700;
374    } else if ey < 0x3ff - 450 {
375        z = x1p_700;
376        x *= x1p700;
377        y *= x1p700;
378    }
379    let (hx, lx) = sq(x);
380    let (hy, ly) = sq(y);
381    z * sqrt(ly + lx + hy + hx)
382}
383
384#[cfg(feature = "std")]
385#[cfg(test)]
386mod tests {
387    use super::sqrt;
388    use std::{eprintln, f64, vec, vec::Vec};
389
390    #[test]
391    fn test_special_cases() {
392        assert_eq!(sqrt(0.0), 0.0);
393        assert_eq!(sqrt(-0.0), -0.0);
394        assert!(sqrt(f64::INFINITY).is_infinite() && sqrt(f64::INFINITY) > 0.0);
395        assert!(sqrt(f64::NEG_INFINITY).is_nan());
396        assert!(sqrt(-1.0).is_nan());
397        assert!(sqrt(f64::NAN).is_nan());
398        // signaling nan? but in practice quiet
399    }
400
401    #[test]
402    fn test_perfect_squares() {
403        for i in 0..100u32 {
404            let x = (i * i) as f64;
405            let r = sqrt(x);
406            assert!((r - i as f64).abs() < 1e-10 || r.is_nan());
407        }
408    }
409
410    #[test]
411    fn test_random_vs_std() {
412        // 5M deterministic LCG random normals in [1,2) — exercises table + Goldschmidt fully
413        let mut failures = 0u32;
414        let mut state: u64 = 0x123456789abcdef0;
415        for _ in 0..5_000_000 {
416            state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
417            let bits = (state & 0x000f_ffff_ffff_ffff) | 0x3ff0_0000_0000_0000; // positive normal [1,2)
418            let val = f64::from_bits(bits);
419            let r1 = sqrt(val);
420            let r2 = val.sqrt();
421            if r1.to_bits() != r2.to_bits() {
422                failures += 1;
423                if failures < 3 {
424                    eprintln!(
425                        "Mismatch at {:016x}: ours={:016x} std={:016x}",
426                        bits,
427                        r1.to_bits(),
428                        r2.to_bits()
429                    );
430                }
431            }
432        }
433        assert_eq!(
434            failures, 0,
435            "Found {} mismatches in 5M random normals [1,2)",
436            failures
437        );
438    }
439
440    #[test]
441    fn test_subnormals_random() {
442        // 100k random subnormals (exp=0) — critical for normalize path
443        let mut failures = 0u32;
444        let mut state: u64 = 0xdeadbeefcafebabe;
445        for _ in 0..100_000 {
446            state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
447            // subnormal: exp=0, random mantissa (low 52 bits)
448            let bits = state & 0x000f_ffff_ffff_ffff; // clears sign + exp
449            let val = f64::from_bits(bits);
450            if val == 0.0 {
451                continue;
452            } // skip zero
453            let r1 = sqrt(val);
454            let r2 = val.sqrt();
455            if r1.to_bits() != r2.to_bits() {
456                failures += 1;
457                if failures < 3 {
458                    eprintln!(
459                        "Subnormal mismatch at {:016x}: ours={:016x} std={:016x}",
460                        bits,
461                        r1.to_bits(),
462                        r2.to_bits()
463                    );
464                }
465            }
466        }
467        assert_eq!(
468            failures, 0,
469            "Found {} mismatches in 100k random subnormals",
470            failures
471        );
472    }
473
474    #[test]
475    fn test_boundaries() {
476        // Critical boundaries: min/max normal, subnormal boundary, overflow edge, powers of 2
477        let boundaries: [f64; 8] = [
478            f64::MIN_POSITIVE,                         // 2^-1022 (smallest normal)
479            f64::from_bits(0x0010_0000_0000_0000),     // 2^-1021
480            f64::from_bits(0x000f_ffff_ffff_ffff),     // largest subnormal
481            2.0f64.powi(-1074),                        // smallest positive subnormal (2^-1074)
482            f64::MAX,                                  // ~1.8e308
483            f64::from_bits(0x7fe0_0000_0000_0000),     // largest finite < inf
484            2.0f64.powi(1023),                         // 2^1023 (largest power of 2)
485            2.0f64.powi(-1022) * (1.0 + f64::EPSILON), // just above min normal
486        ];
487        for &x in &boundaries {
488            let r1 = sqrt(x);
489            let r2 = x.sqrt();
490            assert_eq!(r1.to_bits(), r2.to_bits(), "Boundary mismatch for {:e}", x);
491            // Also check sqrt(x*x) ~ |x| for positive x (within rounding), but skip underflow cases
492            if x > 0.0 && x.is_finite() && x > 1e-200 {
493                let xx = x * x;
494                if xx.is_finite() && xx.is_normal() {
495                    let r = sqrt(xx);
496                    let rel = ((r - x).abs() / x).max(0.0);
497                    assert!(
498                        rel < 1e-14 || r.is_nan(),
499                        "sqrt(x*x) not close to x for {}",
500                        x
501                    );
502                }
503            }
504        }
505    }
506
507    #[test]
508    fn test_known_hard_cases() {
509        // Known hard-to-round / exact / boundary cases — all must match std bit-exactly
510        let cases: &[f64] = &[
511            2.0,
512            0.5,
513            4.0,
514            9.0,
515            0.0,
516            f64::INFINITY,
517            1.0e-300,                              // very small normal
518            f64::from_bits(0x0010_0000_0000_0001), // just above min normal
519            1.0 + f64::EPSILON,                    // next after 1.0
520            f64::from_bits(0x7fefffffffffffff),    // largest finite
521        ];
522        for &x in cases {
523            let r = sqrt(x);
524            // bit-exact check vs Rust std (the gold standard for this platform)
525            assert_eq!(r.to_bits(), x.sqrt().to_bits(), "Bit mismatch for {:e}", x);
526        }
527    }
528
529    // Manual nextUp / nextDown (unstable in this Rust version)
530    fn next_up(x: f64) -> f64 {
531        if x.is_nan() || x == f64::INFINITY {
532            return x;
533        }
534        if x == 0.0 {
535            return f64::from_bits(1);
536        }
537        let bits = x.to_bits();
538        if x > 0.0 {
539            f64::from_bits(bits + 1)
540        } else {
541            f64::from_bits(bits - 1)
542        }
543    }
544    fn next_down(x: f64) -> f64 {
545        if x.is_nan() || x == f64::NEG_INFINITY {
546            return x;
547        }
548        if x == -0.0 || x == 0.0 {
549            return f64::from_bits(0x8000_0000_0000_0001);
550        }
551        let bits = x.to_bits();
552        if x > 0.0 {
553            f64::from_bits(bits - 1)
554        } else {
555            f64::from_bits(bits + 1)
556        }
557    }
558
559    #[test]
560    fn test_powers_of_two() {
561        // All representable powers of 2 (even exponents must be exact, odd use std)
562        for exp in -1074i32..=1023 {
563            let x = if exp >= -1022 {
564                2.0f64.powi(exp)
565            } else {
566                // subnormal 2^exp = 2^(exp + 1074) * 2^-1074
567                f64::from_bits(1u64 << (exp + 1074))
568            };
569            if !x.is_finite() || x == 0.0 {
570                continue;
571            }
572            let r1 = sqrt(x);
573            let r2 = x.sqrt();
574            assert_eq!(
575                r1.to_bits(),
576                r2.to_bits(),
577                "Power-of-2 mismatch for 2^{}",
578                exp
579            );
580            // For even exponents, result should be exactly 2^(exp/2) when representable
581            if exp % 2 == 0 {
582                let expected_exp = exp / 2;
583                if expected_exp >= -1022 {
584                    let expected = 2.0f64.powi(expected_exp);
585                    assert_eq!(
586                        r1.to_bits(),
587                        expected.to_bits(),
588                        "Even power-of-2 not exact for 2^{}",
589                        exp
590                    );
591                }
592            }
593        }
594    }
595
596    #[test]
597    fn test_nextafter_edges() {
598        // nextUp / nextDown around critical points (0, 1, min_normal, max)
599        let mut edges: Vec<f64> = vec![
600            f64::from_bits(1),                     // smallest positive subnormal
601            f64::from_bits(0x0000_0000_0000_0002), // next subnormal
602            next_down(f64::MIN_POSITIVE),          // largest subnormal
603            f64::MIN_POSITIVE,                     // smallest normal
604            next_up(f64::MIN_POSITIVE),
605            next_down(1.0),
606            1.0,
607            next_up(1.0),
608            next_down(f64::MAX),
609            f64::MAX,
610        ];
611        // Also a few negative edges (should all produce NaN)
612        edges.push(next_up(-f64::MIN_POSITIVE)); // negative smallest normal-ish
613        for &x in &edges {
614            let r1 = sqrt(x);
615            let r2 = x.sqrt();
616            assert_eq!(
617                r1.to_bits(),
618                r2.to_bits(),
619                "nextafter edge mismatch for {:e} (bits {:016x})",
620                x,
621                x.to_bits()
622            );
623        }
624    }
625
626    #[test]
627    fn test_negative_subnormals() {
628        // All negative subnormals must produce NaN (sign bit set in result)
629        let mut state: u64 = 0xfeedface_deadbeef;
630        for _ in 0..10_000 {
631            state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
632            let bits = (state & 0x000f_ffff_ffff_ffff) | 0x8000_0000_0000_0000; // negative subnormal
633            let val = f64::from_bits(bits);
634            if val == 0.0 {
635                continue;
636            }
637            let r = sqrt(val);
638            assert!(
639                r.is_nan(),
640                "Negative subnormal did not produce NaN: {:e}",
641                val
642            );
643            // sign bit should be set (negative NaN)
644            assert!(
645                r.to_bits() & 0x8000_0000_0000_0000 != 0,
646                "NaN sign bit not set for negative subnormal"
647            );
648        }
649    }
650}
deep_time/utils.rs

deep_time/
utils.rs