fast_posit/posit/ops/add.rs
1use super::*;
2
3impl<
4 const N: u32,
5 const ES: u32,
6 Int: crate::Int,
7 const RS: u32,
8> Posit<N, ES, Int, RS> {
9 /// Return a [normalised](Decoded::is_normalised) `Decoded` that's the result of adding `x` and
10 /// `y`, plus the sticky bit.
11 ///
12 /// # Safety
13 ///
14 /// `x` and `y` have to be [normalised](Decoded::is_normalised) and cannot be symmetrical, or
15 /// calling this function is *undefined behaviour*.
16 #[inline]
17 pub(crate) unsafe fn add_kernel(x: Decoded<N, ES, RS, Int>, y: Decoded<N, ES, RS, Int>) -> (Decoded<N, ES, RS, Int>, Int) {
18 // Adding two numbers in the form `x.frac × 2^x.exp` and `y.exp × 2^y.exp` is easy, if only
19 // `x.exp` = `y.exp`: then the result would be just `(x.frac + y.exp) × 2^x.exp`. Therefore,
20 // to add two numbers we just have to (1) reduce to the same exponent, and (2) add the
21 // fractions. The remaining complications are to do with detecting over/underflows, and
22 // rounding correctly.
23
24 // First align the exponents. That is: to add `x.frac + 2^x.exp` and `y.frac + 2^y.exp` let
25 // `shift` be the difference between the exponents, add `shift` to the smallest exp, and
26 // divide the corresponding frac by `2^shift` to compensate. For example:
27 //
28 // 0b01_0110 × 2⁰
29 // + 0b01_1000 × 2³
30 //
31 // becomes
32 //
33 // 0b00_0010110 × 2³
34 // + 0b01_1000 × 2³
35 //
36 // because the first number has the smallest `exp`, so we add 3 to it and divide its `frac` by
37 // 2³.
38 let shift = x.exp - y.exp;
39 let (x, y) = if shift.is_positive() { (x, y) } else { (y, x) };
40 let shift = shift.abs().as_u32();
41 // One thing to keep in mind is that `shift` can exceed the width of `Int`. If this happens,
42 // then the *entire* contents of `y.frac` are shifted out, and thus the answer is just `x`.
43 if shift >= Int::BITS { // TODO mark unlikely?
44 return (x, Int::ZERO)
45 };
46 let xfrac = x.frac;
47 let yfrac = y.frac >> shift;
48 let exp = x.exp;
49
50 // Adding two positive or two negative values: an overflow by *1 place* may occur. For example
51 //
52 // 1.25 = 0b01_0100
53 // + 1.0 = 0b01_0000
54 // = 2.25 = 0b10_0100
55 //
56 // If this happens, we must detect this, shift the `frac` right by 1 (i.e. divide by 2), and
57 // add 1 to exponent to compensate
58 //
59 // = 1.125 × 2¹ = 0b01_0010, add +1 to `exp`
60 //
61 // To do this we use `overflowing_add_shift`, which may have a specialised implementation e.g.
62 // using "rotate" instructions; see [crate::underlying].
63 let (frac, overflow) = xfrac.overflowing_add_shift(yfrac);
64 let exp = exp + overflow.into();
65 // If an overflow occurs, then remember to also accumulate the shifted out bit of xfrac and
66 // yfrac into sticky.
67 let sticky_overflow = (xfrac | yfrac) & overflow.into();
68
69 // Adding a positive and a negative value: an underflow by *n places* may occur. For example
70 //
71 // -1.25 = 0b10_1100
72 // + 1.0 = 0b01_0000
73 // = -0.25 = 0b11_1100
74 //
75 // If this happens, we must detect this, shift the `frac` left by `n` (i.e. multiply by 2^n),
76 // and subtract `n` to the exponent to compensate.
77 //
78 // = -1.00 × 2¯³ = 0b10_0000
79 //
80 // To do this we use our trusty `leading_run_minus_one`, since we want to detect that the
81 // number starts with n 0s followed by a 1 or n 1s followed by a 0, and shift them so that
82 // it's just a 01 or a 10.
83 //
84 // SAFETY: x and y are not symmetrical (precondition), so `frac` cannot be 0
85 let underflow = unsafe { frac.leading_run_minus_one() };
86 let frac = frac << underflow;
87 let exp = exp - Int::of_u32(underflow);
88 // If an underflow by `n` occurs, then we need to "recover" `n` of the bits we have shifted out
89 // in `yfrac`, and add them onto the result, because we have set `yfrac = y.frac >> shift`,
90 // but actually should have set `= y.frac >> (shift - underflow)`.
91 //
92 // For example, say `y.frac = 0b11110101`, `shift = 4`, `underflow = 3`. Then
93 //
94 // y.frac = 0b11110101|
95 // y.frac >> shift = 0b00001111|0101 ← discarded 4 bits
96 // y.frac >> (shift - underflow) = 0b01111010|1 ← but should only discard 1
97 //
98 // Here only 1 bit should be shifted out to sticky.
99 let true_shift = shift.saturating_sub(underflow); // TODO ver
100 let recovered = y.frac.mask_lsb(shift) >> true_shift;
101 let sticky = y.frac.mask_lsb(true_shift);
102 let frac = frac | recovered;
103
104 (Decoded{frac, exp}, (sticky | sticky_overflow))
105 }
106
107 #[inline(always)]
108 pub(super) fn add(self, other: Self) -> Self {
109 let sum = self.0.wrapping_add(other.0);
110 if self == Self::NAR || other == Self::NAR {
111 Self::NAR
112 } else if sum == Int::ZERO || sum == self.0 || sum == other.0 {
113 Self(sum)
114 } else {
115 // SAFETY: neither `self` nor `other` are 0 or NaR
116 let a = unsafe { self.decode_regular() };
117 let b = unsafe { other.decode_regular() };
118 // SAFETY: `self` and `other` aren't symmetrical
119 let (result, sticky) = unsafe { Self::add_kernel(a, b) };
120 // SAFETY: `result.is_normalised()` holds
121 unsafe { result.encode_regular_round(sticky) }
122 }
123 }
124
125 #[inline(always)]
126 pub(super) fn sub(self, other: Self) -> Self {
127 self.add(-other)
128 }
129}
130
131use core::ops::{Add, AddAssign, Sub, SubAssign};
132super::mk_ops!{Add, AddAssign, add, add_assign}
133super::mk_ops!{Sub, SubAssign, sub, sub_assign}
134
135#[cfg(test)]
136mod tests {
137 use super::*;
138
139 mod add {
140 super::mk_tests!{+, +=}
141 }
142
143 mod sub {
144 super::mk_tests!{-, -=}
145 }
146}