1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
use super::P16E1;

impl P16E1 {
    pub const fn atan_pi(self) -> Self {
        let ui_a = self.to_bits();

        let mut f = ui_a as u64;

        if (f & 0x7FFF) == 0 {
            return self; // dispense with NaR and 0 cases
        }

        if (ui_a >> 15) != 0 {
            f = 0x10000 - f; // f = |f|
        }

        let mut s: i32;
        if f < 0x4000 {
            // SE quadrant; regime bit is 0
            if f > 4925 {
                s = 14;
                while (f & 0x2000) == 0 {
                    // decode regime
                    f <<= 1;
                    s -= 2;
                }
                if (f & 0x1000) != 0 {
                    s += 1; // decode exponent
                }
                f = (f & 0x0FFF) | 0x1000; // get 12-bit fraction and restore hidden bit
                f <<= s;
                f = poly(f);
            } else {
                // use small x approximation
                s = 13;
                while (f & 0x1000) == 0 {
                    // decode regime
                    f <<= 1;
                    s -= 2;
                }
                if (f & 0x800) != 0 {
                    s += 1; // decode exponent
                }
                f = (f & 0x7FF) | 0x800; // get 12-bit fraction and restore hidden bit
                f = if s < 0 { f >> -s } else { f << s };
                f = (f << 30) / ((((f * f) >> 34) * 67) + 843_314_118);
            }
        } else {
            // NE quadrant; regime bit is 1
            if f < 27_109 {
                s = 0;
                while (f & 0x2000) != 0 {
                    // decode regime
                    f <<= 1;
                    s += 2;
                }
                if (f & 0x1000) != 0 {
                    s += 1; // decode exponent
                }
                f = (f & 0x0FFF) | 0x1000; // get 12-bit fraction and restore hidden bit
                f <<= s;
                f = 0x_0100_0000_0000 / f; // fixed-point reciprocal
                f = 0x_2000_0000 - poly(f);
            } else {
                s = -1;
                while (f & 0x1000) != 0 {
                    // decode regime
                    f <<= 1;
                    s += 2;
                }
                if (f & 0x800) != 0 {
                    s += 1; // decode exponent
                }
                f = (f & 0x7FF) | 0x800; // get 12-bit fraction and restore hidden bit
                f <<= s; // use large value approx. on fixed point:
                f = 0x_2000_0000 - (0x_0002_8BE5_FF80_0000 / ((f << 13) + (0x_0AA5_5000 / f)));
            }
        }

        // convert fixed-point to a posit
        if f > 1 {
            // leave f = 0 and f = minpos alone
            s = 34;
            while (f & 0x_2000_0000) == 0 {
                f <<= 1;
                s += 1;
            }
            f = (f ^ 0x_6000_0000) | (((1 ^ (s & 1)) as u64) << 29);
            s >>= 1;
            let bit = 1_u64 << (s - 1);
            if (f & bit) != 0 {
                // round to nearest, tie to even
                if ((f & (bit - 1)) != 0) || ((f & (bit << 1)) != 0) {
                    f += bit;
                }
            }
            f >>= s;
        }

        Self::from_bits((if (ui_a >> 15) != 0 { 0x10000 - f } else { f }) as u16)
    }
}

#[inline]
const fn poly(f: u64) -> u64 {
    let fsq = (f * f) >> 28;
    let mut s = (fsq * 6_969) >> 24;
    s = (fsq * (530_432 - s)) >> 28;
    s = (fsq * (1_273_944 - s)) >> 28;
    s = (fsq * (2_358_656 - s)) >> 27;
    s = (fsq * (9_340_208 - s)) >> 29;
    s = (fsq * (17_568_064 - s)) >> 24;
    ((f + 1) << 30) / (843_315_168 + s)
}