1use crate::Real;
2
3#[inline]
5pub(crate) const fn clamp_i128_to_i64(x: i128) -> i64 {
6 if x > i64::MAX as i128 {
7 i64::MAX
8 } else if x < i64::MIN as i128 {
9 i64::MIN
10 } else {
11 x as i64
12 }
13}
14
15pub const fn sin_approx(x: Real) -> Real {
86 const PI: Real = f!(core::f64::consts::PI);
87 const TWO_PI: Real = f!(2.0) * PI;
88
89 let mut x = x % TWO_PI;
93 if x < f!(0.0) {
94 x += TWO_PI;
95 }
96 if x > PI {
97 x -= TWO_PI;
98 }
99
100 let sign = if x < f!(0.0) { f!(-1.0) } else { f!(1.0) };
103 let x = x.abs();
104
105 let x = if x > PI / f!(2.0) { PI - x } else { x };
106
107 let y = x * x;
117
118 let p = f!(-1.0) / f!(1307674368000.0); let p = p * y + f!(1.0) / f!(6227020800.0); let p = p * y + f!(-1.0) / f!(39916800.0); let p = p * y + f!(1.0) / f!(362880.0); let p = p * y + f!(-1.0) / f!(5040.0); let p = p * y + f!(1.0) / f!(120.0); let p = p * y + f!(-1.0) / f!(6.0); let p = p * y + f!(1.0); sign * (x * p)
131}
132
133pub(crate) const fn floor_f(x: Real) -> Real {
138 if x.is_nan() || x.is_infinite() {
139 x
140 } else if x == f!(0.0) {
141 x } else {
143 let i = x as i64;
144 let truncated = f!(i);
145 if x >= f!(0.0) || truncated == x {
146 truncated
147 } else {
148 truncated - f!(1.0)
149 }
150 }
151}
152
153const LN2_HI: f64 = 6.93147180369123816490e-01; const LN2_LO: f64 = 1.90821492927058770002e-10; const LG1: f64 = 6.666666666666735130e-01; const LG2: f64 = 3.999999999940941908e-01; const LG3: f64 = 2.857142874366239149e-01; const LG4: f64 = 2.222219843214978396e-01; const LG5: f64 = 1.818357216161805012e-01; const LG6: f64 = 1.531383769920937332e-01; const LG7: f64 = 1.479819860511658591e-01; pub const fn log(mut x: f64) -> f64 {
165 let x1p54 = f64::from_bits(0x4350000000000000); let mut ui = x.to_bits();
168 let mut hx: u32 = (ui >> 32) as u32;
169 let mut k: i32 = 0;
170
171 if (hx < 0x00100000) || ((hx >> 31) != 0) {
172 if ui << 1 == 0 {
174 return -1. / (x * x); }
176 if hx >> 31 != 0 {
177 return (x - x) / 0.0; }
179 k -= 54;
181 x *= x1p54;
182 ui = x.to_bits();
183 hx = (ui >> 32) as u32;
184 } else if hx >= 0x7ff00000 {
185 return x;
186 } else if hx == 0x3ff00000 && ui << 32 == 0 {
187 return 0.;
188 }
189
190 hx += 0x3ff00000 - 0x3fe6a09e;
192 k += ((hx >> 20) as i32) - 0x3ff;
193 hx = (hx & 0x000fffff) + 0x3fe6a09e;
194 ui = ((hx as u64) << 32) | (ui & 0xffffffff);
195 x = f64::from_bits(ui);
196
197 let f: f64 = x - 1.0;
198 let hfsq: f64 = 0.5 * f * f;
199 let s: f64 = f / (2.0 + f);
200 let z: f64 = s * s;
201 let w: f64 = z * z;
202 let t1: f64 = w * (LG2 + w * (LG4 + w * LG6));
203 let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7)));
204 let r: f64 = t2 + t1;
205 let dk: f64 = k as f64;
206 s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI
207}
208
209const RSQRT_TAB: [u16; 128] = [
214 0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43, 0xaa14, 0xa8eb, 0xa7c8, 0xa6aa,
215 0xa592, 0xa480, 0xa373, 0xa26b, 0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
216 0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430, 0x936b, 0x92a9, 0x91ea, 0x912e,
217 0x9075, 0x8fbe, 0x8f0a, 0x8e59, 0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
218 0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479, 0x83ec, 0x8361, 0x82d8, 0x8250,
219 0x81c9, 0x8145, 0x80c2, 0x8040, 0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
220 0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2, 0xe443, 0xe2dc, 0xe17a, 0xe020,
221 0xdecb, 0xdd7d, 0xdc34, 0xdaf1, 0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
222 0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f, 0xc858, 0xc764, 0xc674, 0xc587,
223 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4, 0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
224 0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
225];
226
227#[inline]
228const fn mul32(a: u32, b: u32) -> u32 {
229 ((a as u64).wrapping_mul(b as u64) >> 32) as u32
230}
231
232#[inline]
233const fn mul64(a: u64, b: u64) -> u64 {
234 let ahi = a >> 32;
235 let alo = a & 0xffffffff;
236 let bhi = b >> 32;
237 let blo = b & 0xffffffff;
238 ahi.wrapping_mul(bhi)
239 .wrapping_add(ahi.wrapping_mul(blo) >> 32)
240 .wrapping_add(alo.wrapping_mul(bhi) >> 32)
241}
242
243pub const fn sqrt(x: f64) -> f64 {
247 let mut ix = x.to_bits();
248 let mut top = ix >> 52;
249
250 if top.wrapping_sub(0x001) >= 0x7fe {
252 if ix << 1 == 0 {
253 return x; }
255 if ix == 0x7ff0_0000_0000_0000 {
256 return x; }
258 if ix > 0x7ff0_0000_0000_0000 {
259 let nan_bits = 0x7ff8_0000_0000_0000 | (ix & 0x8000_0000_0000_0000);
261 return f64::from_bits(nan_bits);
262 }
263 let scale = f64::from_bits(0x4330_0000_0000_0000); ix = (x * scale).to_bits();
266 top = (ix >> 52).wrapping_sub(52);
267 }
268
269 let even = top & 1;
270 let mut m = (ix << 11) | 0x8000_0000_0000_0000u64;
271 if even != 0 {
272 m >>= 1;
273 }
274 let top = (top.wrapping_add(0x3ff)) >> 1; let three: u64 = 0xc000_0000;
279 let i = ((ix >> 46) % 128) as usize;
280 let mut r: u64 = (RSQRT_TAB[i] as u64) << 16;
281
282 let mut s: u64 = mul32((m >> 32) as u32, r as u32) as u64;
283 let mut d: u64 = mul32(s as u32, r as u32) as u64;
284 let mut u: u64 = three - d;
285 r = (mul32(r as u32, u as u32) << 1) as u64;
286 s = (mul32(s as u32, u as u32) << 1) as u64;
287
288 d = mul32(s as u32, r as u32) as u64;
289 u = three - d;
290 r = (mul32(r as u32, u as u32) << 1) as u64;
291
292 r <<= 32;
293 s = mul64(m, r);
294 d = mul64(s, r);
295 u = (three << 32) - d;
296 s = mul64(s, u);
297
298 s = (s - 2) >> 9;
300
301 let d0 = (m << 42).wrapping_sub(s.wrapping_mul(s));
302 let d1 = s.wrapping_sub(d0);
303 let _d2 = d1.wrapping_add(s).wrapping_add(1);
304
305 if (d1 >> 63) != 0 {
306 s = s.wrapping_add(1);
307 }
308 s &= 0x000f_ffff_ffff_ffff;
309 s |= (top as u64) << 52;
310
311 f64::from_bits(s)
312}
313
314const SPLIT: f64 = 134217728. + 1.; const fn sq(x: f64) -> (f64, f64) {
317 let xh: f64;
318 let xl: f64;
319 let xc: f64;
320
321 xc = x * SPLIT;
322 xh = x - xc + xc;
323 xl = x - xh;
324 let hi = x * x;
325 let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
326 (hi, lo)
327}
328
329pub const fn hypot(mut x: f64, mut y: f64) -> f64 {
330 let x1p700 = f64::from_bits(0x6bb0000000000000); let x1p_700 = f64::from_bits(0x1430000000000000); let mut uxi = x.to_bits();
334 let mut uyi = y.to_bits();
335 let uti;
336 let ex: i64;
337 let ey: i64;
338 let mut z: f64;
339
340 uxi &= -1i64 as u64 >> 1;
342 uyi &= -1i64 as u64 >> 1;
343 if uxi < uyi {
344 uti = uxi;
345 uxi = uyi;
346 uyi = uti;
347 }
348
349 ex = (uxi >> 52) as i64;
351 ey = (uyi >> 52) as i64;
352 x = f64::from_bits(uxi);
353 y = f64::from_bits(uyi);
354 if ey == 0x7ff {
356 return y;
357 }
358 if ex == 0x7ff || uyi == 0 {
359 return x;
360 }
361 if ex - ey > 64 {
364 return x + y;
365 }
366
367 z = 1.;
370 if ex > 0x3ff + 510 {
371 z = x1p700;
372 x *= x1p_700;
373 y *= x1p_700;
374 } else if ey < 0x3ff - 450 {
375 z = x1p_700;
376 x *= x1p700;
377 y *= x1p700;
378 }
379 let (hx, lx) = sq(x);
380 let (hy, ly) = sq(y);
381 z * sqrt(ly + lx + hy + hx)
382}
383
384#[cfg(feature = "std")]
385#[cfg(test)]
386mod tests {
387 use super::sqrt;
388 use std::{eprintln, f64, vec, vec::Vec};
389
390 #[test]
391 fn test_special_cases() {
392 assert_eq!(sqrt(0.0), 0.0);
393 assert_eq!(sqrt(-0.0), -0.0);
394 assert!(sqrt(f64::INFINITY).is_infinite() && sqrt(f64::INFINITY) > 0.0);
395 assert!(sqrt(f64::NEG_INFINITY).is_nan());
396 assert!(sqrt(-1.0).is_nan());
397 assert!(sqrt(f64::NAN).is_nan());
398 }
400
401 #[test]
402 fn test_perfect_squares() {
403 for i in 0..100u32 {
404 let x = (i * i) as f64;
405 let r = sqrt(x);
406 assert!((r - i as f64).abs() < 1e-10 || r.is_nan());
407 }
408 }
409
410 #[test]
411 fn test_random_vs_std() {
412 let mut failures = 0u32;
414 let mut state: u64 = 0x123456789abcdef0;
415 for _ in 0..5_000_000 {
416 state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
417 let bits = (state & 0x000f_ffff_ffff_ffff) | 0x3ff0_0000_0000_0000; let val = f64::from_bits(bits);
419 let r1 = sqrt(val);
420 let r2 = val.sqrt();
421 if r1.to_bits() != r2.to_bits() {
422 failures += 1;
423 if failures < 3 {
424 eprintln!(
425 "Mismatch at {:016x}: ours={:016x} std={:016x}",
426 bits,
427 r1.to_bits(),
428 r2.to_bits()
429 );
430 }
431 }
432 }
433 assert_eq!(
434 failures, 0,
435 "Found {} mismatches in 5M random normals [1,2)",
436 failures
437 );
438 }
439
440 #[test]
441 fn test_subnormals_random() {
442 let mut failures = 0u32;
444 let mut state: u64 = 0xdeadbeefcafebabe;
445 for _ in 0..100_000 {
446 state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
447 let bits = state & 0x000f_ffff_ffff_ffff; let val = f64::from_bits(bits);
450 if val == 0.0 {
451 continue;
452 } let r1 = sqrt(val);
454 let r2 = val.sqrt();
455 if r1.to_bits() != r2.to_bits() {
456 failures += 1;
457 if failures < 3 {
458 eprintln!(
459 "Subnormal mismatch at {:016x}: ours={:016x} std={:016x}",
460 bits,
461 r1.to_bits(),
462 r2.to_bits()
463 );
464 }
465 }
466 }
467 assert_eq!(
468 failures, 0,
469 "Found {} mismatches in 100k random subnormals",
470 failures
471 );
472 }
473
474 #[test]
475 fn test_boundaries() {
476 let boundaries: [f64; 8] = [
478 f64::MIN_POSITIVE, f64::from_bits(0x0010_0000_0000_0000), f64::from_bits(0x000f_ffff_ffff_ffff), 2.0f64.powi(-1074), f64::MAX, f64::from_bits(0x7fe0_0000_0000_0000), 2.0f64.powi(1023), 2.0f64.powi(-1022) * (1.0 + f64::EPSILON), ];
487 for &x in &boundaries {
488 let r1 = sqrt(x);
489 let r2 = x.sqrt();
490 assert_eq!(r1.to_bits(), r2.to_bits(), "Boundary mismatch for {:e}", x);
491 if x > 0.0 && x.is_finite() && x > 1e-200 {
493 let xx = x * x;
494 if xx.is_finite() && xx.is_normal() {
495 let r = sqrt(xx);
496 let rel = ((r - x).abs() / x).max(0.0);
497 assert!(
498 rel < 1e-14 || r.is_nan(),
499 "sqrt(x*x) not close to x for {}",
500 x
501 );
502 }
503 }
504 }
505 }
506
507 #[test]
508 fn test_known_hard_cases() {
509 let cases: &[f64] = &[
511 2.0,
512 0.5,
513 4.0,
514 9.0,
515 0.0,
516 f64::INFINITY,
517 1.0e-300, f64::from_bits(0x0010_0000_0000_0001), 1.0 + f64::EPSILON, f64::from_bits(0x7fefffffffffffff), ];
522 for &x in cases {
523 let r = sqrt(x);
524 assert_eq!(r.to_bits(), x.sqrt().to_bits(), "Bit mismatch for {:e}", x);
526 }
527 }
528
529 fn next_up(x: f64) -> f64 {
531 if x.is_nan() || x == f64::INFINITY {
532 return x;
533 }
534 if x == 0.0 {
535 return f64::from_bits(1);
536 }
537 let bits = x.to_bits();
538 if x > 0.0 {
539 f64::from_bits(bits + 1)
540 } else {
541 f64::from_bits(bits - 1)
542 }
543 }
544 fn next_down(x: f64) -> f64 {
545 if x.is_nan() || x == f64::NEG_INFINITY {
546 return x;
547 }
548 if x == -0.0 || x == 0.0 {
549 return f64::from_bits(0x8000_0000_0000_0001);
550 }
551 let bits = x.to_bits();
552 if x > 0.0 {
553 f64::from_bits(bits - 1)
554 } else {
555 f64::from_bits(bits + 1)
556 }
557 }
558
559 #[test]
560 fn test_powers_of_two() {
561 for exp in -1074i32..=1023 {
563 let x = if exp >= -1022 {
564 2.0f64.powi(exp)
565 } else {
566 f64::from_bits(1u64 << (exp + 1074))
568 };
569 if !x.is_finite() || x == 0.0 {
570 continue;
571 }
572 let r1 = sqrt(x);
573 let r2 = x.sqrt();
574 assert_eq!(
575 r1.to_bits(),
576 r2.to_bits(),
577 "Power-of-2 mismatch for 2^{}",
578 exp
579 );
580 if exp % 2 == 0 {
582 let expected_exp = exp / 2;
583 if expected_exp >= -1022 {
584 let expected = 2.0f64.powi(expected_exp);
585 assert_eq!(
586 r1.to_bits(),
587 expected.to_bits(),
588 "Even power-of-2 not exact for 2^{}",
589 exp
590 );
591 }
592 }
593 }
594 }
595
596 #[test]
597 fn test_nextafter_edges() {
598 let mut edges: Vec<f64> = vec![
600 f64::from_bits(1), f64::from_bits(0x0000_0000_0000_0002), next_down(f64::MIN_POSITIVE), f64::MIN_POSITIVE, next_up(f64::MIN_POSITIVE),
605 next_down(1.0),
606 1.0,
607 next_up(1.0),
608 next_down(f64::MAX),
609 f64::MAX,
610 ];
611 edges.push(next_up(-f64::MIN_POSITIVE)); for &x in &edges {
614 let r1 = sqrt(x);
615 let r2 = x.sqrt();
616 assert_eq!(
617 r1.to_bits(),
618 r2.to_bits(),
619 "nextafter edge mismatch for {:e} (bits {:016x})",
620 x,
621 x.to_bits()
622 );
623 }
624 }
625
626 #[test]
627 fn test_negative_subnormals() {
628 let mut state: u64 = 0xfeedface_deadbeef;
630 for _ in 0..10_000 {
631 state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
632 let bits = (state & 0x000f_ffff_ffff_ffff) | 0x8000_0000_0000_0000; let val = f64::from_bits(bits);
634 if val == 0.0 {
635 continue;
636 }
637 let r = sqrt(val);
638 assert!(
639 r.is_nan(),
640 "Negative subnormal did not produce NaN: {:e}",
641 val
642 );
643 assert!(
645 r.to_bits() & 0x8000_0000_0000_0000 != 0,
646 "NaN sign bit not set for negative subnormal"
647 );
648 }
649 }
650}