1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
use super::*;
impl<
const N: u32,
const ES: u32,
Int: crate::Int,
const RS: u32,
> Posit<N, ES, Int, RS> {
/// Return a [normalised](Decoded::is_normalised) `Decoded` that's the result of adding `x` and
/// `y`, plus the sticky bit.
///
/// # Safety
///
/// `x` and `y` have to be [normalised](Decoded::is_normalised) and cannot be symmetrical, or
/// calling this function is *undefined behaviour*.
#[inline]
pub(crate) unsafe fn add_kernel(x: Decoded<N, ES, RS, Int>, y: Decoded<N, ES, RS, Int>) -> (Decoded<N, ES, RS, Int>, Int) {
// Adding two numbers in the form `x.frac × 2^x.exp` and `y.exp × 2^y.exp` is easy, if only
// `x.exp` = `y.exp`: then the result would be just `(x.frac + y.exp) × 2^x.exp`. Therefore,
// to add two numbers we just have to (1) reduce to the same exponent, and (2) add the
// fractions. The remaining complications are to do with detecting over/underflows, and
// rounding correctly.
// First align the exponents. That is: to add `x.frac + 2^x.exp` and `y.frac + 2^y.exp` let
// `shift` be the difference between the exponents, add `shift` to the smallest exp, and
// divide the corresponding frac by `2^shift` to compensate. For example:
//
// 0b01_0110 × 2⁰
// + 0b01_1000 × 2³
//
// becomes
//
// 0b00_0010110 × 2³
// + 0b01_1000 × 2³
//
// because the first number has the smallest `exp`, so we add 3 to it and divide its `frac` by
// 2³.
let shift = x.exp - y.exp;
let (x, y) = if shift.is_positive() { (x, y) } else { (y, x) };
let shift = shift.abs().as_u32();
// One thing to keep in mind is that `shift` can exceed the width of `Int`. If this happens,
// then the *entire* contents of `y.frac` are shifted out, and thus the answer is just `x`.
if shift >= Int::BITS { // TODO mark unlikely?
return (x, Int::ZERO)
};
let xfrac = x.frac;
let yfrac = y.frac >> shift;
let exp = x.exp;
// Adding two positive or two negative values: an overflow by *1 place* may occur. For example
//
// 1.25 = 0b01_0100
// + 1.0 = 0b01_0000
// = 2.25 = 0b10_0100
//
// If this happens, we must detect this, shift the `frac` right by 1 (i.e. divide by 2), and
// add 1 to exponent to compensate
//
// = 1.125 × 2¹ = 0b01_0010, add +1 to `exp`
//
// To do this we use `overflowing_add_shift`, which may have a specialised implementation e.g.
// using "rotate" instructions; see [crate::underlying].
let (frac, overflow) = xfrac.overflowing_add_shift(yfrac);
let exp = exp + overflow.into();
// If an overflow occurs, then remember to also accumulate the shifted out bit of xfrac and
// yfrac into sticky.
let sticky_overflow = (xfrac | yfrac) & overflow.into();
// Adding a positive and a negative value: an underflow by *n places* may occur. For example
//
// -1.25 = 0b10_1100
// + 1.0 = 0b01_0000
// = -0.25 = 0b11_1100
//
// If this happens, we must detect this, shift the `frac` left by `n` (i.e. multiply by 2^n),
// and subtract `n` to the exponent to compensate.
//
// = -1.00 × 2¯³ = 0b10_0000
//
// To do this we use our trusty `leading_run_minus_one`, since we want to detect that the
// number starts with n 0s followed by a 1 or n 1s followed by a 0, and shift them so that
// it's just a 01 or a 10.
//
// SAFETY: x and y are not symmetrical (precondition), so `frac` cannot be 0
let underflow = unsafe { frac.leading_run_minus_one() };
let frac = frac << underflow;
let exp = exp - Int::of_u32(underflow);
// If an underflow by `n` occurs, then we need to "recover" `n` of the bits we have shifted out
// in `yfrac`, and add them onto the result, because we have set `yfrac = y.frac >> shift`,
// but actually should have set `= y.frac >> (shift - underflow)`.
//
// For example, say `y.frac = 0b11110101`, `shift = 4`, `underflow = 3`. Then
//
// y.frac = 0b11110101|
// y.frac >> shift = 0b00001111|0101 ← discarded 4 bits
// y.frac >> (shift - underflow) = 0b01111010|1 ← but should only discard 1
//
// Here only 1 bit should be shifted out to sticky.
let true_shift = shift.saturating_sub(underflow); // TODO ver
let recovered = y.frac.mask_lsb(shift) >> true_shift;
let sticky = y.frac.mask_lsb(true_shift);
let frac = frac | recovered;
(Decoded{frac, exp}, (sticky | sticky_overflow))
}
#[inline(always)]
pub(super) fn add(self, other: Self) -> Self {
let sum = self.0.wrapping_add(other.0);
if self == Self::NAR || other == Self::NAR {
Self::NAR
} else if sum == Int::ZERO || sum == self.0 || sum == other.0 {
Self(sum)
} else {
// SAFETY: neither `self` nor `other` are 0 or NaR
let a = unsafe { self.decode_regular() };
let b = unsafe { other.decode_regular() };
// SAFETY: `self` and `other` aren't symmetrical
let (result, sticky) = unsafe { Self::add_kernel(a, b) };
// SAFETY: `result.is_normalised()` holds
unsafe { result.encode_regular_round(sticky) }
}
}
#[inline(always)]
pub(super) fn sub(self, other: Self) -> Self {
self.add(-other)
}
}
use core::ops::{Add, AddAssign, Sub, SubAssign};
super::mk_ops!{Add, AddAssign, add, add_assign}
super::mk_ops!{Sub, SubAssign, sub, sub_assign}
#[cfg(test)]
mod tests {
use super::*;
mod add {
super::mk_tests!{+, +=}
}
mod sub {
super::mk_tests!{-, -=}
}
}