fast_posit/posit/ops/
add.rs

1use super::*;
2
3impl<
4  const N: u32,
5  const ES: u32,
6  Int: crate::Int,
7> Posit<N, ES, Int> {
8  /// Return a [normalised](Decoded::is_normalised) `Decoded` that's the result of adding `x` and
9  /// `y`, plus the sticky bit.
10  ///
11  /// # Safety
12  ///
13  /// `x` and `y` have to be [normalised](Decoded::is_normalised) and cannot be symmetrical, or
14  /// calling this function is *undefined behaviour*.
15  #[inline]
16  pub(crate) unsafe fn add_kernel(x: Decoded<N, ES, Int>, y: Decoded<N, ES, Int>) -> (Decoded<N, ES, Int>, Int) {
17    // Adding two numbers in the form `x.frac × 2^x.exp` and `y.exp × 2^y.exp` is easy, if only
18    // `x.exp` = `y.exp`: then the result would be just `(x.frac + y.exp) × 2^x.exp`. Therefore,
19    // to add two numbers we just have to (1) reduce to the same exponent, and (2) add the
20    // fractions. The remaining complications are to do with detecting over/underflows, and
21    // rounding correctly.
22
23    // First align the exponents. That is: to add `x.frac + 2^x.exp` and `y.frac + 2^y.exp` let
24    // `shift` be the difference between the exponents, add `shift` to the smallest exp, and
25    // divide the corresponding frac by `2^shift` to compensate. For example:
26    //
27    //     0b01_0110 × 2⁰
28    //   + 0b01_1000 × 2³
29    //
30    // becomes
31    //
32    //     0b00_0010110 × 2³
33    //   + 0b01_1000    × 2³
34    //
35    // because the first number has the smallest `exp`, so we add 3 to it and divide its `frac` by
36    // 2³.
37    let shift = x.exp - y.exp;
38    let (x, y) = if shift.is_positive() { (x, y) } else { (y, x) };
39    let shift = shift.abs().as_u32();
40    // One thing to keep in mind is that `shift` can exceed the width of `Int`. If this happens,
41    // then the *entire* contents of `y.frac` are shifted out, and thus the answer is just `x`.
42    if shift >= Int::BITS {  // TODO mark unlikely?
43      return (x, Int::ZERO)
44    };
45    let xfrac = x.frac;
46    let yfrac = y.frac >> shift;
47    let exp = x.exp;
48
49    // Adding two positive or two negative values: an overflow by *1 place* may occur. For example
50    //
51    //     1.25 = 0b01_0100
52    //   + 1.0  = 0b01_0000
53    //   = 2.25 = 0b10_0100
54    //
55    // If this happens, we must detect this, shift the `frac` right by 1 (i.e. divide by 2), and
56    // add 1 to exponent to compensate
57    //
58    //   = 1.125 × 2¹ = 0b01_0010, add +1 to `exp`
59    //
60    // To do this we use `overflowing_add_shift`, which may have a specialised implementation e.g.
61    // using "rotate" instructions; see [crate::underlying].
62    let (frac, overflow) = xfrac.overflowing_add_shift(yfrac);
63    let exp = exp + overflow.into();
64    // If an overflow occurs, then remember to also accumulate the shifted out bit of xfrac and
65    // yfrac into sticky.
66    let sticky_overflow = (xfrac | yfrac) & overflow.into();
67
68    // Adding a positive and a negative value: an underflow by *n places* may occur. For example
69    //
70    //     -1.25 = 0b10_1100
71    //   +  1.0  = 0b01_0000
72    //   = -0.25 = 0b11_1100
73    //
74    // If this happens, we must detect this, shift the `frac` left by `n` (i.e. multiply by 2^n),
75    // and subtract `n` to the exponent to compensate.
76    //
77    //   = -1.00 × 2¯³ = 0b10_0000
78    //
79    // To do this we use our trusty `leading_run_minus_one`, since we want to detect that the
80    // number starts with n 0s followed by a 1 or n 1s followed by a 0, and shift them so that
81    // it's just a 01 or a 10.
82    //
83    // SAFETY: x and y are not symmetrical (precondition), so `frac` cannot be 0
84    let underflow = unsafe { frac.leading_run_minus_one() };
85    let frac = frac << underflow;
86    let exp = exp - Int::of_u32(underflow);
87    // If an underflow by `n` occurs, then we need to "recover" `n` of the bits we have shifted out
88    // in `yfrac`, and add them onto the result, because we have set `yfrac = y.frac >> shift`,
89    // but actually should have set `= y.frac >> (shift - underflow)`.
90    //
91    // For example, say `y.frac = 0b11110101`, `shift = 4`, `underflow = 3`. Then
92    //
93    //    y.frac                        = 0b11110101|
94    //    y.frac >> shift               = 0b00001111|0101    ← discarded 4 bits
95    //    y.frac >> (shift - underflow) = 0b01111010|1       ← but should only discard 1
96    //
97    // Here only 1 bit should be shifted out to sticky.
98    let true_shift = shift.saturating_sub(underflow);  // TODO ver
99    let recovered = y.frac.mask_lsb(shift) >> true_shift;
100    let sticky = y.frac.mask_lsb(true_shift);
101    let frac = frac | recovered;
102
103    (Decoded{frac, exp}, (sticky | sticky_overflow))
104  }
105
106  pub(crate) fn add(self, other: Self) -> Self {
107    let sum = self.0.wrapping_add(other.0);
108    if self == Self::NAR || other == Self::NAR {
109      Self::NAR
110    } else if sum == Int::ZERO || sum == self.0 || sum == other.0 {
111      Self(sum)
112    } else {
113      // SAFETY: neither `self` nor `other` are 0 or NaR
114      let a = unsafe { self.decode_regular() };
115      let b = unsafe { other.decode_regular() };
116      // SAFETY: `self` and `other` aren't symmetrical
117      let (result, sticky) = unsafe { Self::add_kernel(a, b) };
118      // SAFETY: `result.is_normalised()` holds
119      unsafe { result.encode_regular_round(sticky) }
120    }
121  }
122
123  pub(crate) fn sub(self, other: Self) -> Self {
124    self.add(-other)
125  }
126}
127
128use core::ops::{Add, AddAssign, Sub, SubAssign};
129super::mk_ops!{Add, AddAssign, add, add_assign}
130super::mk_ops!{Sub, SubAssign, sub, sub_assign}
131
132#[cfg(test)]
133mod tests {
134  use super::*;
135
136  mod add {
137    super::mk_tests!{+, +=}
138  }
139
140  mod sub {
141    super::mk_tests!{-, -=}
142  }
143}