1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
use super::Q16E1;
use crate::P16E1;
use core::ops;

crate::macros::quire_add_sub!(P16E1, Q16E1);
crate::macros::quire_add_sub_array!(P16E1, Q16E1, 1, 2, 3, 4);

pub(super) fn fdp(q: &mut Q16E1, mut ui_a: u16, mut ui_b: u16, plus: bool) {
    let u_z1 = q.to_bits();

    if q.is_nar() || ui_a == 0x_8000 || ui_b == 0x_8000 {
        *q = Q16E1::NAR;
        return;
    } else if ui_a == 0 || ui_b == 0 {
        return;
    }

    //max pos (sign plus and minus)
    let sign_a = P16E1::sign_ui(ui_a);
    let sign_b = P16E1::sign_ui(ui_b);
    let sign_z2 = sign_a ^ sign_b;

    if sign_a {
        ui_a = ui_a.wrapping_neg();
    }
    if sign_b {
        ui_b = ui_b.wrapping_neg();
    }

    let (mut k_a, mut exp_a, frac_a) = P16E1::separate_bits(ui_a);

    let (k_b, exp_b, frac_b) = P16E1::separate_bits(ui_b);
    k_a += k_b;
    exp_a += exp_b;
    let mut frac32_z = (frac_a as u32) * (frac_b as u32);

    if exp_a > 1 {
        k_a += 1;
        exp_a ^= 0x2;
    }

    let rcarry = (frac32_z >> 29) != 0; //3rd bit (position 2) of frac32_z, hidden bit is 4th bit (position 3)
    if rcarry {
        if exp_a != 0 {
            k_a += 1;
        }
        exp_a ^= 1;
        frac32_z >>= 1;
    }

    //default dot is between bit 71 and 72, extreme left bit is bit 0. Last right bit is bit 127.
    //Scale = 2^es * k + e  => 2k + e
    let shift_right = -(28 + ((k_a as i16) << 1) + (exp_a as i16));
    let mut u_z2 = if shift_right < 0 {
        (frac32_z as u128) << -shift_right
    } else {
        (frac32_z as u128) >> shift_right
    };

    if !(sign_z2 ^ plus) {
        u_z2 = u_z2.wrapping_neg();
    }
    //Addition
    let u_z = u_z2.wrapping_add(u_z1);

    //Exception handling for NaR
    let q_z = Q16E1::from_bits(u_z);
    *q = if q_z.is_nar() { Q16E1::ZERO } else { q_z }
}

pub(super) fn fdp_one(q: &mut Q16E1, mut ui_a: u16, plus: bool) {
    let u_z1 = q.to_bits();

    if q.is_nar() || ui_a == 0x_8000 {
        *q = Q16E1::NAR;
        return;
    } else if ui_a == 0 {
        return;
    }

    //max pos (sign plus and minus)
    let sign_a = P16E1::sign_ui(ui_a);

    if sign_a {
        ui_a = ui_a.wrapping_neg();
    }

    let (mut k_a, mut exp_a, frac_a) = P16E1::separate_bits(ui_a);
    let mut frac32_z = (frac_a as u32) << 14;

    if exp_a > 1 {
        k_a += 1;
        exp_a ^= 0x2;
    }

    let rcarry = (frac32_z >> 29) != 0; //3rd bit (position 2) of frac32_z, hidden bit is 4th bit (position 3)
    if rcarry {
        if exp_a != 0 {
            k_a += 1;
        }
        exp_a ^= 1;
        frac32_z >>= 1;
    }

    //default dot is between bit 71 and 72, extreme left bit is bit 0. Last right bit is bit 127.
    //Scale = 2^es * k + e  => 2k + e
    let shift_right = -(28 + ((k_a as i16) << 1) + (exp_a as i16));
    let mut u_z2 = if shift_right < 0 {
        (frac32_z as u128) << -shift_right
    } else {
        (frac32_z as u128) >> shift_right
    };

    if !(sign_a ^ plus) {
        u_z2 = u_z2.wrapping_neg();
    }
    //Addition
    let u_z = u_z2.wrapping_add(u_z1);

    //Exception handling for NaR
    let q_z = Q16E1::from_bits(u_z);
    *q = if q_z.is_nar() { Q16E1::ZERO } else { q_z }
}

#[cfg(test)]
fn ulp(x: P16E1, y: P16E1) -> i16 {
    let xi = x.to_bits() as i16;
    let yi = y.to_bits() as i16;
    (xi - yi).abs()
}

#[test]
fn test_quire_mul_add() {
    use rand::Rng;
    let mut rng = rand::thread_rng();
    for _ in 0..crate::NTESTS16 {
        let p_a: P16E1 = rng.gen();
        let p_b: P16E1 = rng.gen();
        let p_c: P16E1 = rng.gen();
        let f_a = f64::from(p_a);
        let f_b = f64::from(p_b);
        let f_c = f64::from(p_c);
        let mut q = Q16E1::init();
        q += (p_a, p_b);
        q += p_c;
        let p = q.to_posit();
        let f = f_a.mul_add(f_b, f_c);
        assert!(ulp(p, P16E1::from(f)) <= 1);
    }
}

#[test]
fn test_quire_mul_sub() {
    use rand::Rng;
    let mut rng = rand::thread_rng();
    for _ in 0..crate::NTESTS16 {
        let p_a: P16E1 = rng.gen();
        let p_b: P16E1 = rng.gen();
        let p_c: P16E1 = rng.gen();
        let f_a = f64::from(p_a);
        let f_b = f64::from(p_b);
        let f_c = f64::from(p_c);
        let mut q = Q16E1::init();
        q -= (p_a, p_b);
        q += p_c;
        let p = q.to_posit();
        let f = (-f_a).mul_add(f_b, f_c);
        assert!(
            ulp(p, P16E1::from(f)) <= 1 /*, "p_a = {}\tp_b = {}\tp_c = {}\tp = {}\tf = {}", p_a, p_b, p_c, p, f*/
        );
    }
}