rustc_apfloat/
ppc.rs

1//! Support for floating point types that use PowerPC semantics.
2
3use crate::ieee;
4use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd};
5
6use core::cmp::Ordering;
7use core::fmt;
8use core::ops::Neg;
9
10/// A larger floating point number represented by two smaller floats.
11#[must_use]
12#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
13pub struct DoubleFloat<F>(F, F);
14
15/// 128-bit floating point number comprised of two IEEE [`Double`](ieee::Double) values.
16///
17/// This is the "IBM Extended Double" format, described at
18/// <https://www.ibm.com/docs/en/aix/7.3?topic=sepl-128-bit-long-double-floating-point-data-type>.
19pub type DoubleDouble = DoubleFloat<ieee::Double>;
20
21// These are legacy semantics for the Fallback, inaccurate implementation of
22// IBM double-double, if the accurate DoubleDouble doesn't handle the
23// operation. It's equivalent to having an IEEE number with consecutive 106
24// bits of mantissa and 11 bits of exponent.
25//
26// It's not equivalent to IBM double-double. For example, a legit IBM
27// double-double, 1 + epsilon:
28//
29//   1 + epsilon = 1 + (1 >> 1076)
30//
31// is not representable by a consecutive 106 bits of mantissa.
32//
33// Currently, these semantics are used in the following way:
34//
35//   DoubleDouble -> (Double, Double) ->
36//   DoubleDouble's Fallback -> IEEE operations
37//
38// FIXME: Implement all operations in DoubleDouble, and delete these
39// semantics.
40// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds.
41pub struct FallbackS<F>(F);
42type Fallback<F> = ieee::IeeeFloat<FallbackS<F>>;
43impl<F: Float> ieee::Semantics for FallbackS<F> {
44    // Forbid any conversion to/from bits.
45    const BITS: usize = 0;
46    const EXP_BITS: usize = 0;
47
48    const PRECISION: usize = F::PRECISION * 2;
49    const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt;
50    const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt;
51}
52
53// Convert number to F. To avoid spurious underflows, we re-
54// normalize against the F exponent range first, and only *then*
55// truncate the mantissa. The result of that second conversion
56// may be inexact, but should never underflow.
57// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds.
58pub struct FallbackExtendedS<F>(F);
59type FallbackExtended<F> = ieee::IeeeFloat<FallbackExtendedS<F>>;
60impl<F: Float> ieee::Semantics for FallbackExtendedS<F> {
61    // Forbid any conversion to/from bits.
62    const BITS: usize = 0;
63    const EXP_BITS: usize = 0;
64
65    const PRECISION: usize = Fallback::<F>::PRECISION;
66    const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt;
67    const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt;
68}
69
70impl<F: Float> From<Fallback<F>> for DoubleFloat<F>
71where
72    F: FloatConvert<FallbackExtended<F>>,
73    FallbackExtended<F>: FloatConvert<F>,
74{
75    fn from(x: Fallback<F>) -> Self {
76        let mut status;
77        let mut loses_info = false;
78
79        let extended: FallbackExtended<F> = unpack!(status=, x.convert(&mut loses_info));
80        assert_eq!((status, loses_info), (Status::OK, false));
81
82        let a = unpack!(status=, extended.convert(&mut loses_info));
83        assert_eq!(status - Status::INEXACT, Status::OK);
84
85        // If conversion was exact or resulted in a special case, we're done;
86        // just set the second double to zero. Otherwise, re-convert back to
87        // the extended format and compute the difference. This now should
88        // convert exactly to double.
89        let b = if a.is_finite_non_zero() && loses_info {
90            let u: FallbackExtended<F> = unpack!(status=, a.convert(&mut loses_info));
91            assert_eq!((status, loses_info), (Status::OK, false));
92            let v = unpack!(status=, extended - u);
93            assert_eq!(status, Status::OK);
94            let v = unpack!(status=, v.convert(&mut loses_info));
95            assert_eq!((status, loses_info), (Status::OK, false));
96            v
97        } else {
98            F::ZERO
99        };
100
101        DoubleFloat(a, b)
102    }
103}
104
105impl<F: FloatConvert<Self>> From<DoubleFloat<F>> for Fallback<F> {
106    fn from(DoubleFloat(a, b): DoubleFloat<F>) -> Self {
107        let mut status;
108        let mut loses_info = false;
109
110        // Get the first F and convert to our format.
111        let a = unpack!(status=, a.convert(&mut loses_info));
112        assert_eq!((status, loses_info), (Status::OK, false));
113
114        // Unless we have a special case, add in second F.
115        if a.is_finite_non_zero() {
116            let b = unpack!(status=, b.convert(&mut loses_info));
117            assert_eq!((status, loses_info), (Status::OK, false));
118
119            (a + b).value
120        } else {
121            a
122        }
123    }
124}
125
126float_common_impls!(DoubleFloat<F>);
127
128impl<F: Float> Neg for DoubleFloat<F> {
129    type Output = Self;
130    fn neg(self) -> Self {
131        if self.1.is_finite_non_zero() {
132            DoubleFloat(-self.0, -self.1)
133        } else {
134            DoubleFloat(-self.0, self.1)
135        }
136    }
137}
138
139impl<F: FloatConvert<Fallback<F>>> fmt::Display for DoubleFloat<F> {
140    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
141        fmt::Display::fmt(&Fallback::from(*self), f)
142    }
143}
144
145impl<F: FloatConvert<Fallback<F>>> Float for DoubleFloat<F>
146where
147    Self: From<Fallback<F>>,
148{
149    const BITS: usize = F::BITS * 2;
150    const PRECISION: usize = Fallback::<F>::PRECISION;
151    const MAX_EXP: ExpInt = Fallback::<F>::MAX_EXP;
152    const MIN_EXP: ExpInt = Fallback::<F>::MIN_EXP;
153
154    const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO);
155
156    const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO);
157
158    // FIXME(eddyb) remove when qnan becomes const fn.
159    const NAN: Self = DoubleFloat(F::NAN, F::ZERO);
160
161    fn qnan(payload: Option<u128>) -> Self {
162        DoubleFloat(F::qnan(payload), F::ZERO)
163    }
164
165    fn snan(payload: Option<u128>) -> Self {
166        DoubleFloat(F::snan(payload), F::ZERO)
167    }
168
169    fn largest() -> Self {
170        let status;
171        let mut r = DoubleFloat(F::largest(), F::largest());
172        r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1));
173        r.1 = unpack!(status=, r.1.next_down());
174        assert_eq!(status, Status::OK);
175        r
176    }
177
178    const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO);
179
180    fn smallest_normalized() -> Self {
181        DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO)
182    }
183
184    // Implement addition, subtraction, multiplication and division based on:
185    // "Software for Doubled-Precision Floating-Point Computations",
186    // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
187
188    fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
189        match (self.category(), rhs.category()) {
190            (Category::Infinity, Category::Infinity) => {
191                if self.is_negative() != rhs.is_negative() {
192                    Status::INVALID_OP.and(Self::NAN.copy_sign(self))
193                } else {
194                    Status::OK.and(self)
195                }
196            }
197
198            (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK.and(self),
199
200            (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => Status::OK.and(rhs),
201
202            (Category::Normal, Category::Normal) => {
203                let mut status = Status::OK;
204                let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1);
205                let mut z = a;
206                z = unpack!(status|=, z.add_r(c, round));
207                if !z.is_finite() {
208                    if !z.is_infinite() {
209                        return status.and(DoubleFloat(z, F::ZERO));
210                    }
211                    status = Status::OK;
212                    let a_cmp_c = a.cmp_abs_normal(c);
213                    z = cc;
214                    z = unpack!(status|=, z.add_r(aa, round));
215                    if a_cmp_c == Ordering::Greater {
216                        // z = cc + aa + c + a;
217                        z = unpack!(status|=, z.add_r(c, round));
218                        z = unpack!(status|=, z.add_r(a, round));
219                    } else {
220                        // z = cc + aa + a + c;
221                        z = unpack!(status|=, z.add_r(a, round));
222                        z = unpack!(status|=, z.add_r(c, round));
223                    }
224                    if !z.is_finite() {
225                        return status.and(DoubleFloat(z, F::ZERO));
226                    }
227                    self.0 = z;
228                    let mut zz = aa;
229                    zz = unpack!(status|=, zz.add_r(cc, round));
230                    if a_cmp_c == Ordering::Greater {
231                        // self.1 = a - z + c + zz;
232                        self.1 = a;
233                        self.1 = unpack!(status|=, self.1.sub_r(z, round));
234                        self.1 = unpack!(status|=, self.1.add_r(c, round));
235                        self.1 = unpack!(status|=, self.1.add_r(zz, round));
236                    } else {
237                        // self.1 = c - z + a + zz;
238                        self.1 = c;
239                        self.1 = unpack!(status|=, self.1.sub_r(z, round));
240                        self.1 = unpack!(status|=, self.1.add_r(a, round));
241                        self.1 = unpack!(status|=, self.1.add_r(zz, round));
242                    }
243                } else {
244                    // q = a - z;
245                    let mut q = a;
246                    q = unpack!(status|=, q.sub_r(z, round));
247
248                    // zz = q + c + (a - (q + z)) + aa + cc;
249                    // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
250                    let mut zz = q;
251                    zz = unpack!(status|=, zz.add_r(c, round));
252                    q = unpack!(status|=, q.add_r(z, round));
253                    q = unpack!(status|=, q.sub_r(a, round));
254                    q = -q;
255                    zz = unpack!(status|=, zz.add_r(q, round));
256                    zz = unpack!(status|=, zz.add_r(aa, round));
257                    zz = unpack!(status|=, zz.add_r(cc, round));
258                    if zz.is_zero() && !zz.is_negative() {
259                        return Status::OK.and(DoubleFloat(z, F::ZERO));
260                    }
261                    self.0 = z;
262                    self.0 = unpack!(status|=, self.0.add_r(zz, round));
263                    if !self.0.is_finite() {
264                        self.1 = F::ZERO;
265                        return status.and(self);
266                    }
267                    self.1 = z;
268                    self.1 = unpack!(status|=, self.1.sub_r(self.0, round));
269                    self.1 = unpack!(status|=, self.1.add_r(zz, round));
270                }
271                status.and(self)
272            }
273        }
274    }
275
276    fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
277        // Interesting observation: For special categories, finding the lowest
278        // common ancestor of the following layered graph gives the correct
279        // return category:
280        //
281        //    NaN
282        //   /   \
283        // Zero  Inf
284        //   \   /
285        //   Normal
286        //
287        // e.g. NaN * NaN = NaN
288        //      Zero * Inf = NaN
289        //      Normal * Zero = Zero
290        //      Normal * Inf = Inf
291        match (self.category(), rhs.category()) {
292            (Category::NaN, _) => Status::OK.and(self),
293
294            (_, Category::NaN) => Status::OK.and(rhs),
295
296            (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => Status::OK.and(Self::NAN),
297
298            (Category::Zero, _) | (Category::Infinity, _) => Status::OK.and(self),
299
300            (_, Category::Zero) | (_, Category::Infinity) => Status::OK.and(rhs),
301
302            (Category::Normal, Category::Normal) => {
303                let mut status = Status::OK;
304                let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1);
305                // t = a * c
306                let mut t = a;
307                t = unpack!(status|=, t.mul_r(c, round));
308                if !t.is_finite_non_zero() {
309                    return status.and(DoubleFloat(t, F::ZERO));
310                }
311
312                // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
313                let mut tau = a;
314                tau = unpack!(status|=, tau.mul_add_r(c, -t, round));
315                // v = a * d
316                let mut v = a;
317                v = unpack!(status|=, v.mul_r(d, round));
318                // w = b * c
319                let mut w = b;
320                w = unpack!(status|=, w.mul_r(c, round));
321                v = unpack!(status|=, v.add_r(w, round));
322                // tau += v + w
323                tau = unpack!(status|=, tau.add_r(v, round));
324                // u = t + tau
325                let mut u = t;
326                u = unpack!(status|=, u.add_r(tau, round));
327
328                self.0 = u;
329                if !u.is_finite() {
330                    self.1 = F::ZERO;
331                } else {
332                    // self.1 = (t - u) + tau
333                    t = unpack!(status|=, t.sub_r(u, round));
334                    t = unpack!(status|=, t.add_r(tau, round));
335                    self.1 = t;
336                }
337                status.and(self)
338            }
339        }
340    }
341
342    fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
343        Fallback::from(self)
344            .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round)
345            .map(Self::from)
346    }
347
348    fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self> {
349        Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from)
350    }
351
352    fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> {
353        Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from)
354    }
355
356    fn c_fmod(self, rhs: Self) -> StatusAnd<Self> {
357        Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from)
358    }
359
360    fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
361        Fallback::from(self).round_to_integral(round).map(Self::from)
362    }
363
364    fn next_up(self) -> StatusAnd<Self> {
365        Fallback::from(self).next_up().map(Self::from)
366    }
367
368    fn from_bits(input: u128) -> Self {
369        let (a, b) = (input, input >> F::BITS);
370        DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1)))
371    }
372
373    fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
374        Fallback::from_u128_r(input, round).map(Self::from)
375    }
376
377    fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
378        Fallback::from_str_r(s, round).map(|r| r.map(Self::from))
379    }
380
381    fn to_bits(self) -> u128 {
382        self.0.to_bits() | (self.1.to_bits() << F::BITS)
383    }
384
385    fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
386        Fallback::from(self).to_u128_r(width, round, is_exact)
387    }
388
389    fn cmp_abs_normal(self, rhs: Self) -> Ordering {
390        self.0.cmp_abs_normal(rhs.0).then_with(|| {
391            let result = self.1.cmp_abs_normal(rhs.1);
392            if result != Ordering::Equal {
393                let against = self.0.is_negative() ^ self.1.is_negative();
394                let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative();
395                (!against)
396                    .cmp(&!rhs_against)
397                    .then_with(|| if against { result.reverse() } else { result })
398            } else {
399                result
400            }
401        })
402    }
403
404    fn bitwise_eq(self, rhs: Self) -> bool {
405        self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1)
406    }
407
408    fn is_negative(self) -> bool {
409        self.0.is_negative()
410    }
411
412    fn is_denormal(self) -> bool {
413        self.category() == Category::Normal
414            && (self.0.is_denormal() || self.0.is_denormal() ||
415          // (double)(Hi + Lo) == Hi defines a normal number.
416          self.0 !=  (self.0 + self.1).value)
417    }
418
419    fn is_signaling(self) -> bool {
420        self.0.is_signaling()
421    }
422
423    fn category(self) -> Category {
424        self.0.category()
425    }
426
427    fn is_integer(self) -> bool {
428        self.0.is_integer() && self.1.is_integer()
429    }
430
431    fn get_exact_inverse(self) -> Option<Self> {
432        Fallback::from(self).get_exact_inverse().map(Self::from)
433    }
434
435    fn ilogb(self) -> ExpInt {
436        self.0.ilogb()
437    }
438
439    fn scalbn_r(self, exp: ExpInt, round: Round) -> Self {
440        DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round))
441    }
442
443    fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self {
444        let a = self.0.frexp_r(exp, round);
445        let mut b = self.1;
446        if self.category() == Category::Normal {
447            b = b.scalbn_r(-*exp, round);
448        }
449        DoubleFloat(a, b)
450    }
451}
452
453// HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat`
454// has private fields, and it's not worth it to make them public just for testing.
455#[test]
456fn is_integer() {
457    let double_from_f64 = |f: f64| ieee::Double::from_bits(f.to_bits().into());
458    assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer());
459    assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer());
460    assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer());
461}