softposit/p16e1/math/
sqrt.rs

1use super::P16E1;
2
3impl P16E1 {
4    pub const fn sqrt(self) -> Self {
5        let mut ui_a = self.to_bits();
6
7        // If sign bit is set, return NaR.
8        if (ui_a & 0x_8000) != 0 {
9            return Self::NAR;
10        }
11        // If the argument is zero, return zero.
12        if ui_a == 0 {
13            return Self::ZERO;
14        }
15        // Compute the square root. Here, k_z is the net power-of-2 scaling of the result.
16        // Decode the regime and exponent bit; scale the input to be in the range 1 to 4:
17        let mut k_z: i16;
18        if (ui_a >> 14) != 0 {
19            k_z = -1;
20            while (ui_a & 0x4000) != 0 {
21                k_z += 1;
22                ui_a <<= 1;
23            }
24        } else {
25            k_z = 0;
26            while (ui_a & 0x4000) == 0 {
27                k_z -= 1;
28                ui_a <<= 1;
29            }
30        }
31        ui_a &= 0x3fff;
32        let exp_a = 1 - (ui_a >> 13);
33        let frac_a = (ui_a | 0x2000) >> 1;
34
35        // Use table look-up of first four bits for piecewise linear approx. of 1/sqrt:
36        let index = (((frac_a >> 8) & 0xE) + exp_a) as usize;
37
38        let r0 = (crate::APPROX_RECIP_SQRT0[index] as u32
39            - (((crate::APPROX_RECIP_SQRT1[index] as u32) * ((frac_a & 0x1FF) as u32)) >> 13))
40            as u16 as u32;
41        // Use Newton-Raphson refinement to get more accuracy for 1/sqrt:
42        let mut e_sqr_r0 = (r0 * r0) >> 1;
43
44        if exp_a != 0 {
45            e_sqr_r0 >>= 1;
46        }
47        let sigma0 = 0xFFFF ^ ((0xFFFF & (((e_sqr_r0 as u64) * (frac_a as u64)) >> 18)) as u16); //~(u16) ((e_sqr_r0 * frac_a) >> 18);
48        let recip_sqrt = (r0 << 2) + ((r0 * (sigma0 as u32)) >> 23);
49
50        // We need 17 bits of accuracy for posit16 square root approximation.
51        // Multiplying 16 bits and 18 bits needs 64-bit scratch before the right shift:
52        let mut frac_z = (((frac_a as u64) * (recip_sqrt as u64)) >> 13) as u32;
53
54        // Figure out the regime and the resulting right shift of the fraction:
55        let shift: u16;
56        let mut ui_z: u16 = if k_z < 0 {
57            shift = ((-1 - k_z) >> 1) as u16;
58            0x2000 >> shift
59        } else {
60            shift = (k_z >> 1) as u16;
61            0x7fff - (0x7FFF >> (shift + 1))
62        };
63        // Set the exponent bit in the answer, if it is nonzero:
64        if (k_z & 1) != 0 {
65            ui_z |= 0x1000 >> shift;
66        }
67
68        // Right-shift fraction bits, accounting for 1 <= a < 2 versus 2 <= a < 4:
69        frac_z >>= exp_a + shift;
70
71        // Trick for eliminating off-by-one cases that only uses one multiply:
72        frac_z += 1;
73        if (frac_z & 7) == 0 {
74            let shifted_frac_z = frac_z >> 1;
75            let neg_rem = (shifted_frac_z * shifted_frac_z) & 0x3_FFFF;
76            if (neg_rem & 0x2_0000) != 0 {
77                frac_z |= 1;
78            } else if neg_rem != 0 {
79                frac_z -= 1;
80            }
81        }
82        // Strip off the hidden bit and round-to-nearest using last 4 bits.
83        frac_z -= 0x1_0000 >> shift;
84        let bit_n_plus_one = ((frac_z >> 3) & 1) != 0;
85        if bit_n_plus_one && ((((frac_z >> 4) & 1) | (frac_z & 7)) != 0) {
86            frac_z += 0x10;
87        }
88        // Assemble the result and return it.
89        Self::from_bits(ui_z | ((frac_z >> 4) as u16))
90    }
91}
92
93#[test]
94fn test_sqrt() {
95    use rand::Rng;
96    let mut rng = rand::thread_rng();
97    for _ in 0..crate::NTESTS16 {
98        let p_a: P16E1 = rng.gen();
99        let f_a = f64::from(p_a);
100        let p = p_a.sqrt();
101        let f = f_a.sqrt();
102        assert_eq!(p, P16E1::from(f));
103    }
104}