fast_posit/posit/ops/
add.rs

1use super::*;
2
3impl<
4  const N: u32,
5  const ES: u32,
6  Int: crate::Int,
7  const RS: u32,
8> Posit<N, ES, Int, RS> {
9  /// Return a [normalised](Decoded::is_normalised) `Decoded` that's the result of adding `x` and
10  /// `y`, plus the sticky bit.
11  ///
12  /// # Safety
13  ///
14  /// `x` and `y` have to be [normalised](Decoded::is_normalised) and cannot be symmetrical, or
15  /// calling this function is *undefined behaviour*.
16  #[inline]
17  pub(crate) unsafe fn add_kernel(x: Decoded<N, ES, RS, Int>, y: Decoded<N, ES, RS, Int>) -> (Decoded<N, ES, RS, Int>, Int) {
18    // Adding two numbers in the form `x.frac × 2^x.exp` and `y.exp × 2^y.exp` is easy, if only
19    // `x.exp` = `y.exp`: then the result would be just `(x.frac + y.exp) × 2^x.exp`. Therefore,
20    // to add two numbers we just have to (1) reduce to the same exponent, and (2) add the
21    // fractions. The remaining complications are to do with detecting over/underflows, and
22    // rounding correctly.
23
24    // First align the exponents. That is: to add `x.frac + 2^x.exp` and `y.frac + 2^y.exp` let
25    // `shift` be the difference between the exponents, add `shift` to the smallest exp, and
26    // divide the corresponding frac by `2^shift` to compensate. For example:
27    //
28    //     0b01_0110 × 2⁰
29    //   + 0b01_1000 × 2³
30    //
31    // becomes
32    //
33    //     0b00_0010110 × 2³
34    //   + 0b01_1000    × 2³
35    //
36    // because the first number has the smallest `exp`, so we add 3 to it and divide its `frac` by
37    // 2³.
38    let shift = x.exp - y.exp;
39    let (x, y) = if shift.is_positive() { (x, y) } else { (y, x) };
40    let shift = shift.abs().as_u32();
41    // One thing to keep in mind is that `shift` can exceed the width of `Int`. If this happens,
42    // then the *entire* contents of `y.frac` are shifted out, and thus the answer is just `x`.
43    if shift >= Int::BITS {  // TODO mark unlikely?
44      return (x, Int::ZERO)
45    };
46    let xfrac = x.frac;
47    let yfrac = y.frac >> shift;
48    let exp = x.exp;
49
50    // Adding two positive or two negative values: an overflow by *1 place* may occur. For example
51    //
52    //     1.25 = 0b01_0100
53    //   + 1.0  = 0b01_0000
54    //   = 2.25 = 0b10_0100
55    //
56    // If this happens, we must detect this, shift the `frac` right by 1 (i.e. divide by 2), and
57    // add 1 to exponent to compensate
58    //
59    //   = 1.125 × 2¹ = 0b01_0010, add +1 to `exp`
60    //
61    // To do this we use `overflowing_add_shift`, which may have a specialised implementation e.g.
62    // using "rotate" instructions; see [crate::underlying].
63    let (frac, overflow) = xfrac.overflowing_add_shift(yfrac);
64    let exp = exp + overflow.into();
65    // If an overflow occurs, then remember to also accumulate the shifted out bit of xfrac and
66    // yfrac into sticky.
67    let sticky_overflow = (xfrac | yfrac) & overflow.into();
68
69    // Adding a positive and a negative value: an underflow by *n places* may occur. For example
70    //
71    //     -1.25 = 0b10_1100
72    //   +  1.0  = 0b01_0000
73    //   = -0.25 = 0b11_1100
74    //
75    // If this happens, we must detect this, shift the `frac` left by `n` (i.e. multiply by 2^n),
76    // and subtract `n` to the exponent to compensate.
77    //
78    //   = -1.00 × 2¯³ = 0b10_0000
79    //
80    // To do this we use our trusty `leading_run_minus_one`, since we want to detect that the
81    // number starts with n 0s followed by a 1 or n 1s followed by a 0, and shift them so that
82    // it's just a 01 or a 10.
83    //
84    // SAFETY: x and y are not symmetrical (precondition), so `frac` cannot be 0
85    let underflow = unsafe { frac.leading_run_minus_one() };
86    let frac = frac << underflow;
87    let exp = exp - Int::of_u32(underflow);
88    // If an underflow by `n` occurs, then we need to "recover" `n` of the bits we have shifted out
89    // in `yfrac`, and add them onto the result, because we have set `yfrac = y.frac >> shift`,
90    // but actually should have set `= y.frac >> (shift - underflow)`.
91    //
92    // For example, say `y.frac = 0b11110101`, `shift = 4`, `underflow = 3`. Then
93    //
94    //    y.frac                        = 0b11110101|
95    //    y.frac >> shift               = 0b00001111|0101    ← discarded 4 bits
96    //    y.frac >> (shift - underflow) = 0b01111010|1       ← but should only discard 1
97    //
98    // Here only 1 bit should be shifted out to sticky.
99    let true_shift = shift.saturating_sub(underflow);  // TODO ver
100    let recovered = y.frac.mask_lsb(shift) >> true_shift;
101    let sticky = y.frac.mask_lsb(true_shift);
102    let frac = frac | recovered;
103
104    (Decoded{frac, exp}, (sticky | sticky_overflow))
105  }
106
107  #[inline(always)]
108  pub(super) fn add(self, other: Self) -> Self {
109    let sum = self.0.wrapping_add(other.0);
110    if self == Self::NAR || other == Self::NAR {
111      Self::NAR
112    } else if sum == Int::ZERO || sum == self.0 || sum == other.0 {
113      Self(sum)
114    } else {
115      // SAFETY: neither `self` nor `other` are 0 or NaR
116      let a = unsafe { self.decode_regular() };
117      let b = unsafe { other.decode_regular() };
118      // SAFETY: `self` and `other` aren't symmetrical
119      let (result, sticky) = unsafe { Self::add_kernel(a, b) };
120      // SAFETY: `result.is_normalised()` holds
121      unsafe { result.encode_regular_round(sticky) }
122    }
123  }
124
125  #[inline(always)]
126  pub(super) fn sub(self, other: Self) -> Self {
127    self.add(-other)
128  }
129}
130
131use core::ops::{Add, AddAssign, Sub, SubAssign};
132super::mk_ops!{Add, AddAssign, add, add_assign}
133super::mk_ops!{Sub, SubAssign, sub, sub_assign}
134
135#[cfg(test)]
136mod tests {
137  use super::*;
138
139  mod add {
140    super::mk_tests!{+, +=}
141  }
142
143  mod sub {
144    super::mk_tests!{-, -=}
145  }
146}
fast_posit/posit/ops/add.rs

fast_posit/posit/ops/
add.rs