fast-posit 0.2.0

Software implementation of the Posit floating point format
Documentation
use super::*;

impl<
  const N: u32,
  const ES: u32,
  Int: crate::Int,
  const RS: u32,
> Posit<N, ES, Int, RS> {
  /// Return a [normalised](Decoded::is_normalised) `Decoded` that's the result of adding `x` and
  /// `y`, plus the sticky bit.
  ///
  /// # Safety
  ///
  /// `x` and `y` have to be [normalised](Decoded::is_normalised) and cannot be symmetrical, or
  /// calling this function is *undefined behaviour*.
  #[inline]
  pub(crate) unsafe fn add_kernel(x: Decoded<N, ES, RS, Int>, y: Decoded<N, ES, RS, Int>) -> (Decoded<N, ES, RS, Int>, Int) {
    // Adding two numbers in the form `x.frac × 2^x.exp` and `y.exp × 2^y.exp` is easy, if only
    // `x.exp` = `y.exp`: then the result would be just `(x.frac + y.exp) × 2^x.exp`. Therefore,
    // to add two numbers we just have to (1) reduce to the same exponent, and (2) add the
    // fractions. The remaining complications are to do with detecting over/underflows, and
    // rounding correctly.

    // First align the exponents. That is: to add `x.frac + 2^x.exp` and `y.frac + 2^y.exp` let
    // `shift` be the difference between the exponents, add `shift` to the smallest exp, and
    // divide the corresponding frac by `2^shift` to compensate. For example:
    //
    //     0b01_0110 × 2⁰
    //   + 0b01_1000 × 2³
    //
    // becomes
    //
    //     0b00_0010110 × 2³
    //   + 0b01_1000    × 2³
    //
    // because the first number has the smallest `exp`, so we add 3 to it and divide its `frac` by
    // 2³.
    let shift = x.exp - y.exp;
    let (x, y) = if shift.is_positive() { (x, y) } else { (y, x) };
    let shift = shift.abs().as_u32();
    // One thing to keep in mind is that `shift` can exceed the width of `Int`. If this happens,
    // then the *entire* contents of `y.frac` are shifted out, and thus the answer is just `x`.
    if shift >= Int::BITS {  // TODO mark unlikely?
      return (x, Int::ZERO)
    };
    let xfrac = x.frac;
    let yfrac = y.frac >> shift;
    let exp = x.exp;

    // Adding two positive or two negative values: an overflow by *1 place* may occur. For example
    //
    //     1.25 = 0b01_0100
    //   + 1.0  = 0b01_0000
    //   = 2.25 = 0b10_0100
    //
    // If this happens, we must detect this, shift the `frac` right by 1 (i.e. divide by 2), and
    // add 1 to exponent to compensate
    //
    //   = 1.125 × 2¹ = 0b01_0010, add +1 to `exp`
    //
    // To do this we use `overflowing_add_shift`, which may have a specialised implementation e.g.
    // using "rotate" instructions; see [crate::underlying].
    let (frac, overflow) = xfrac.overflowing_add_shift(yfrac);
    let exp = exp + overflow.into();
    // If an overflow occurs, then remember to also accumulate the shifted out bit of xfrac and
    // yfrac into sticky.
    let sticky_overflow = (xfrac | yfrac) & overflow.into();

    // Adding a positive and a negative value: an underflow by *n places* may occur. For example
    //
    //     -1.25 = 0b10_1100
    //   +  1.0  = 0b01_0000
    //   = -0.25 = 0b11_1100
    //
    // If this happens, we must detect this, shift the `frac` left by `n` (i.e. multiply by 2^n),
    // and subtract `n` to the exponent to compensate.
    //
    //   = -1.00 × 2¯³ = 0b10_0000
    //
    // To do this we use our trusty `leading_run_minus_one`, since we want to detect that the
    // number starts with n 0s followed by a 1 or n 1s followed by a 0, and shift them so that
    // it's just a 01 or a 10.
    //
    // SAFETY: x and y are not symmetrical (precondition), so `frac` cannot be 0
    let underflow = unsafe { frac.leading_run_minus_one() };
    let frac = frac << underflow;
    let exp = exp - Int::of_u32(underflow);
    // If an underflow by `n` occurs, then we need to "recover" `n` of the bits we have shifted out
    // in `yfrac`, and add them onto the result, because we have set `yfrac = y.frac >> shift`,
    // but actually should have set `= y.frac >> (shift - underflow)`.
    //
    // For example, say `y.frac = 0b11110101`, `shift = 4`, `underflow = 3`. Then
    //
    //    y.frac                        = 0b11110101|
    //    y.frac >> shift               = 0b00001111|0101    ← discarded 4 bits
    //    y.frac >> (shift - underflow) = 0b01111010|1       ← but should only discard 1
    //
    // Here only 1 bit should be shifted out to sticky.
    let true_shift = shift.saturating_sub(underflow);  // TODO ver
    let recovered = y.frac.mask_lsb(shift) >> true_shift;
    let sticky = y.frac.mask_lsb(true_shift);
    let frac = frac | recovered;

    (Decoded{frac, exp}, (sticky | sticky_overflow))
  }

  #[inline(always)]
  pub(super) fn add(self, other: Self) -> Self {
    let sum = self.0.wrapping_add(other.0);
    if self == Self::NAR || other == Self::NAR {
      Self::NAR
    } else if sum == Int::ZERO || sum == self.0 || sum == other.0 {
      Self(sum)
    } else {
      // SAFETY: neither `self` nor `other` are 0 or NaR
      let a = unsafe { self.decode_regular() };
      let b = unsafe { other.decode_regular() };
      // SAFETY: `self` and `other` aren't symmetrical
      let (result, sticky) = unsafe { Self::add_kernel(a, b) };
      // SAFETY: `result.is_normalised()` holds
      unsafe { result.encode_regular_round(sticky) }
    }
  }

  #[inline(always)]
  pub(super) fn sub(self, other: Self) -> Self {
    self.add(-other)
  }
}

use core::ops::{Add, AddAssign, Sub, SubAssign};
super::mk_ops!{Add, AddAssign, add, add_assign}
super::mk_ops!{Sub, SubAssign, sub, sub_assign}

#[cfg(test)]
mod tests {
  use super::*;

  mod add {
    super::mk_tests!{+, +=}
  }

  mod sub {
    super::mk_tests!{-, -=}
  }
}