extended/
lib.rs

1//! Extended-precision 80-bit floating-point numbers (f80).
2
3#[warn(missing_docs)]
4
5use std::convert::From;
6
7/// An 80-bit extended floating-point number.
8/// 
9/// See Apple Numerics Manual, 2nd edition (1988), p. 18 "SANE Data Types".
10#[derive(Debug, Copy, Clone, PartialEq)]
11pub struct Extended {
12    // The sign is stored as the high bit. The low 15 bits contain the exponent,
13	// with a bias of 16383.
14    pub sign_exponent: u16,
15
16    // The fraction includes a ones place as the high bit. The value in the ones
17	// place may be zero.
18    pub fraction: u64,
19}
20
21const MAX_EXPONENT_64: u32 = (1 << 11) - 1;
22
23impl Extended {
24    /// Create an extended 80-bit floating-point number from its big endian
25    /// representation.
26    pub fn from_be_bytes(b: [u8; 10]) -> Self {
27        Extended {
28            sign_exponent: u16::from_be_bytes(b[0..2].try_into().unwrap()),
29            fraction: u64::from_be_bytes(b[2..10].try_into().unwrap()),
30        }
31    }
32
33    /// Create an extended 80-bit floating-point number from its little endian
34    /// representation.
35    pub fn from_le_bytes(b: [u8; 10]) -> Self {
36        Extended {
37            sign_exponent: u16::from_le_bytes(b[8..10].try_into().unwrap()),
38            fraction: u64::from_le_bytes(b[0..8].try_into().unwrap()),
39        }
40    }
41
42    /// Convert an 80-bit floating-point number to its big endian
43    /// representation.
44    pub fn to_be_bytes(&self) -> [u8; 10] {
45        let mut b = [0u8; 10];
46        b[0..2].copy_from_slice(&self.sign_exponent.to_be_bytes());
47        b[2..10].copy_from_slice(&self.fraction.to_be_bytes());
48        b
49    }
50
51    /// Convert an 80-bit floating-point number to its big endian
52    /// representation.
53    pub fn to_le_bytes(&self) -> [u8; 10] {
54        let mut b = [0u8; 10];
55        b[8..10].copy_from_slice(&self.sign_exponent.to_le_bytes());
56        b[0..8].copy_from_slice(&self.fraction.to_le_bytes());
57        b
58    }
59
60    /// Convert to a 64-bit floating-point number. Values which are out of range
61    /// are flushed to infinity or zero.
62    pub fn to_f64(&self) -> f64 {
63        const INFINITY: u64 = (MAX_EXPONENT_64 as u64) << 52;
64        const NAN: u64 = u64::MAX >> 1;
65        let exponent = i32::from(self.sign_exponent) & 0x7fff;
66        let bits = if exponent == 0x7fff {
67            if self.fraction == 0 {
68                INFINITY
69            } else {
70                NAN
71            }
72        } else if self.fraction == 0 {
73            0
74        } else {
75            // 2^(e64 - 1023) * 1.fraction
76            // = 2^(e80 - 16383) * 1.fraction / 2^nzero
77            // e63 - 1023 = e80 - 16383
78            // e63 = e80 - 16383 + 1023 - nzero
79            let nzero = self.fraction.leading_zeros();
80            let exponent = exponent - 16383 + 1023 - (nzero as i32);
81            let fraction = self.fraction << nzero;
82            // Fraction is of the form 1.xxxxx.
83            if exponent <= 0 {
84                // Subnormal numbers.
85                let shift = 12 - exponent;
86                let (fraction, rem) = if shift > 64 {
87                    (0, 0)
88                } else if shift == 64 {
89                    (0, fraction)
90                } else {
91                    (fraction >> shift, fraction << (64 - shift))
92                };
93                // The (fraction & 1) makes this round to even.
94                if (rem | (fraction & 1)) <= (1 << 63) {
95                    fraction
96                } else {
97                    fraction + 1
98                }
99            } else {
100                // Round it to 52 bits. The addition of ((fraction >> 11) & 1)
101                // makes this round to even.
102                let rem = (fraction & ((1 << 11) - 1)) | ((fraction >> 11) & 1);
103                let fraction = (fraction >> 11) & ((1 << 52) - 1);
104                let (exponent, fraction) = if rem <= (1 << 10) {
105                    (exponent, fraction)
106                } else if fraction < (1 << 52) - 1 {
107                    (exponent, fraction + 1)
108                } else {
109                    (exponent + 1, 0)
110                };
111                if exponent >= (MAX_EXPONENT_64 as i32) {
112                    // Out of range.
113                    INFINITY
114                } else {
115                    fraction | ((exponent as u64) << 52)
116                }
117            }
118        };
119        let sign = (u64::from(self.sign_exponent) & 0x8000) << 48;
120        f64::from_bits(bits | sign)
121    }
122}
123
124impl From<f64> for Extended {
125    fn from(x: f64) -> Self {
126        let bits = x.to_bits();
127        let sign = ((bits >> (63 - 15)) as u32) & 0x8000;
128        let exponent = ((bits >> 52) as u32) & MAX_EXPONENT_64;
129        let mantissa = bits & ((1 << 52) - 1);
130        if exponent == 0 {
131            // Zero or subnormal.
132            // Number is (-1)^sign * 2^-1022 * 0.mantissa.
133            if mantissa == 0 {
134                Extended {
135                    sign_exponent: sign as u16,
136                    fraction: 0,
137                }
138            } else {
139                // 2^-1022 * 0.mantissa = 2^(e-16383) * 2^lzero * 0.mantissa
140                // -1022 = e - 16383 + lzero
141                // e = -1022 + 16383 - lzero
142                let nzero = mantissa.leading_zeros();
143                let exponent = 16383 - 1022 + 11 - nzero;
144                Extended {
145                    sign_exponent: (sign | exponent) as u16,
146                    fraction: mantissa << nzero,
147                }
148            }
149        } else if exponent == MAX_EXPONENT_64 {
150            // Infinity or NaN.
151            Extended {
152                sign_exponent: (sign | 0x7fff) as u16,
153                fraction: if mantissa == 0 { 0 } else { u64::MAX },
154            }
155        } else {
156            // 2^(e64 - 1023) * 1.fraction = 2^(e80 - 16383) * 1.fraction
157            // e63 - 1023 = e80 - 16383
158            // e80 = e63 + 16383 - 1023
159            let exponent = exponent + 16383 - 1023;
160            Extended {
161                sign_exponent: (sign | exponent) as u16,
162                fraction: (1 << 63) | (mantissa << 11),
163            }
164        }
165    }
166}
167
168impl From<f32> for Extended {
169    fn from(x: f32) -> Self {
170        f64::from(x).into()
171    }
172}
173
174impl From<i32> for Extended {
175    fn from(x: i32) -> Self {
176        f64::from(x).into()
177    }
178}
179
180impl From<u32> for Extended {
181    fn from(x: u32) -> Self {
182        f64::from(x).into()
183    }
184}
185
186#[cfg(test)]
187mod test {
188    use super::*;
189
190    fn equal_f64(x: f64, y: f64) -> bool {
191        if x.is_nan() {
192            y.is_nan()
193        } else {
194            x == y
195        }
196    }
197
198    #[test]
199    fn test_to_f64() {
200        const CASES: &[(u16, u64, f64)] = &[
201            // Easy.
202            (16383, 1 << 63, 1.0),
203            (16384, 1 << 63, 2.0),
204            (16382, 1 << 63, 0.5),
205            // Next after 1.0.
206            (16383, (1 << 63) + (1 << 11), 1.0000000000000002),
207            // Rounds to even.
208            (16383, (1 << 63) + (1 << 10), 1.0),
209            (16383, (1 << 63) + (1 << 10) + 1, 1.0000000000000002),
210            (16383, (1 << 63) + (1 << 11), 1.0000000000000002),
211            (16383, (1 << 63) + (3 << 10) - 1, 1.0000000000000002),
212            (16383, (1 << 63) + (3 << 10), 1.0000000000000004),
213            // Rounds to next exponent.
214            (16381, u64::MAX, 0.5),
215            // Is infinity.
216            (32767, 0, f64::INFINITY),
217            // Out of range.
218            (32000, 1 << 63, f64::INFINITY),
219            (32000, u64::MAX, f64::INFINITY),
220            (17406, 0xfffffffffffff800, 1.7976931348623157e+308),
221            (17406, 0xfffffffffffffbff, 1.7976931348623157e+308),
222            (17406, 0xfffffffffffffc00, f64::INFINITY),
223            // Zero.
224            (0, 0, 0.0),
225            // NaN.
226            (32767, 1, f64::NAN),
227            (32767, 1 << 63, f64::NAN),
228            // Smallest normal.
229            (15361, 1 << 63, 2.2250738585072014e-308),
230            // Subnormal.
231            (15360, 1 << 63, 1.1125369292536007e-308),
232            // Smallest subnormal.
233            (15309, 1 << 63, 5e-324),
234            // Rounds up to smallest subnormal.
235            (15308, (1 << 63) + 1, 5e-324),
236            (15308, 1 << 63, 0.0),
237            // Very small.
238            (10000, 1 << 63, 0.0),
239        ];
240        let mut failed = false;
241        for (n, &(exponent, fraction, expect)) in CASES.iter().enumerate() {
242            for sign in 0..2 {
243                let exponent = exponent | ((sign as u16) << 15);
244                let fin = Extended { sign_exponent: exponent, fraction };
245                let fout = fin.to_f64();
246                let expect = if sign == 0 { expect } else { -expect };
247                if !equal_f64(fout, expect) {
248                    failed = true;
249                    eprintln!(
250                        "Case {}: Input = {:04x}:{:016x}, Output = {:?}, Expected = {:?}",
251                        n, exponent, fraction, fout, expect
252                    );
253                }
254            }
255        }
256        if failed {
257            panic!("test failed");
258        }
259    }
260
261    #[test]
262    fn test_from_f64() {
263        const CASES: &[(u16, u64, f64)] = &[
264            // Easy.
265            (16383, 1 << 63, 1.0),
266            (16384, 1 << 63, 2.0),
267            (16382, 1 << 63, 0.5),
268            (16383 - 10, 1 << 63, 0.0009765625),
269            (16383 - 100, 1 << 63, 7.888609052210118e-31),
270            // Next after 1.0.
271            (16383, (1 << 63) + (1 << 11), 1.0000000000000002),
272            // Is infinity.
273            (32767, 0, f64::INFINITY),
274            // Zero.
275            (0, 0, 0.0),
276            // NaN.
277            (32767, u64::MAX, f64::NAN),
278            // Smallest normal.
279            (15361, 1 << 63, 2.2250738585072014e-308),
280            // Subnormal.
281            (15360, 1 << 63, 1.1125369292536007e-308),
282            // // Smallest subnormal.
283            (15309, 1 << 63, 5e-324),
284        ];
285        let mut failed = false;
286        for (n, &(exponent, fraction, fin)) in CASES.iter().enumerate() {
287            for sign in 0..2 {
288                let exponent = exponent | ((sign as u16) << 15);
289                let fin = if sign == 0 { fin } else { -fin };
290                let fout = Extended::from(fin);
291                let expect = Extended { sign_exponent: exponent, fraction };
292                if fout != expect {
293                    failed = true;
294                    eprintln!(
295                        "Case {}: Input = {:?}, Output = {:04x}:{:016x}, Expected = {:04x}:{:016x}",
296                        n, fin, fout.sign_exponent, fout.fraction, expect.sign_exponent, expect.fraction
297                    );
298                    continue;
299                }
300                // Round-trip sanity check.
301                let rev = fout.to_f64();
302                if !equal_f64(fin, rev) {
303                    failed = true;
304                    eprintln!(
305                        "Case {}: Round trip faied: {:?} -> {:04x}:{:016x} -> {:?}",
306                        n, fin, fout.sign_exponent, fout.fraction, rev
307                    );
308                }
309            }
310        }
311        if failed {
312            panic!("test failed");
313        }
314    }
315}